1.文件操作
位置在:util/env_posix.cc (以posix系统为例,基类Env同样是虚类,PosixEnv是其具体实现,还有还有别的如Windows版本的)
注意文件读取在多环境下并不线程安全,需要开发者调用时采取外部手段
文件顺序读如下:
read从文件当前位置读取指定字节数,skip为从当前位置跳过指定字节数
class PosixSequentialFile final : public SequentialFile {
public:
PosixSequentialFile(std::string filename, int fd)
: fd_(fd), filename_(std::move(filename)) {}
~PosixSequentialFile() override { close(fd_); }
Status Read(size_t n, Slice* result, char* scratch) override {
Status status;
while (true) {
::ssize_t read_size = ::read(fd_, scratch, n);
if (read_size < 0) { // Read error.
if (errno == EINTR) {
continue; // Retry
}
status = PosixError(filename_, errno);
break;
}
*result = Slice(scratch, read_size);
break;
}
return status;
}
Status Skip(uint64_t n) override {
if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
return PosixError(filename_, errno);
}
return Status::OK();
}
private:
const int fd_;
const std::string filename_;
};
文件顺序写如下:
append顺序写、close关闭、flush刷入内核缓冲区,sync才是真正写入磁盘
class PosixWritableFile final : public WritableFile {
public:
PosixWritableFile(std::string filename, int fd)
: pos_(0),
fd_(fd),
is_manifest_(IsManifest(filename)),
filename_(std::move(filename)),
dirname_(Dirname(filename_)) {}
~PosixWritableFile() override {
if (fd_ >= 0) {
// Ignoring any potential errors
Close();
}
}
Status Append(const Slice& data) override {
size_t write_size = data.size();
const char* write_data = data.data();
// Fit as much as possible into buffer.
size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
std::memcpy(buf_ + pos_, write_data, copy_size);
write_data += copy_size;
write_size -= copy_size;
pos_ += copy_size;
if (write_size == 0) {
return Status::OK();
}
// Can't fit in buffer, so need to do at least one write.
Status status = FlushBuffer();
if (!status.ok()) {
return status;
}
// Small writes go to buffer, large writes are written directly.
if (write_size < kWritableFileBufferSize) {
std::memcpy(buf_, write_data, write_size);
pos_ = write_size;
return Status::OK();
}
return WriteUnbuffered(write_data, write_size);
}
Status Close() override {
Status status = FlushBuffer();
const int close_result = ::close(fd_);
if (close_result < 0 && status.ok()) {
status = PosixError(filename_, errno);
}
fd_ = -1;
return status;
}
Status Flush() override { return FlushBuffer(); }
Status Sync() override {
// Ensure new files referred to by the manifest are in the filesystem.
//
// This needs to happen before the manifest file is flushed to disk, to
// avoid crashing in a state where the manifest refers to files that are not
// yet on disk.
Status status = SyncDirIfManifest();
if (!status.ok()) {
return status;
}
status = FlushBuffer();
if (!status.ok()) {
return status;
}
return SyncFd(fd_, filename_);
}
private:
Status FlushBuffer() {
Status status = WriteUnbuffered(buf_, pos_);
pos_ = 0;
return status;
}
Status WriteUnbuffered(const char* data, size_t size) {
while (size > 0) {
ssize_t write_result = ::write(fd_, data, size);
if (write_result < 0) {
if (errno == EINTR) {
continue; // Retry
}
return PosixError(filename_, errno);
}
data += write_result;
size -= write_result;
}
return Status::OK();
}
Status SyncDirIfManifest() {
Status status;
if (!is_manifest_) {
return status;
}
int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
if (fd < 0) {
status = PosixError(dirname_, errno);
} else {
status = SyncFd(fd, dirname_);
::close(fd);
}
return status;
}
// Ensures that all the caches associated with the given file descriptor's
// data are flushed all the way to durable media, and can withstand power
// failures.
//
// The path argument is only used to populate the description string in the
// returned Status if an error occurs.
static Status SyncFd(int fd, const std::string& fd_path) {
#if HAVE_FULLFSYNC
// On macOS and iOS, fsync() doesn't guarantee durability past power
// failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
// filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
// fsync().
if (::fcntl(fd, F_FULLFSYNC) == 0) {
return Status::OK();
}
#endif // HAVE_FULLFSYNC
#if HAVE_FDATASYNC
bool sync_success = ::fdatasync(fd) == 0;
#else
bool sync_success = ::fsync(fd) == 0;
#endif // HAVE_FDATASYNC
if (sync_success) {
return Status::OK();
}
return PosixError(fd_path, errno);
}
// Returns the directory name in a path pointing to a file.
//
// Returns "." if the path does not contain any directory separator.
static std::string Dirname(const std::string& filename) {
std::string::size_type separator_pos = filename.rfind('/');
if (separator_pos == std::string::npos) {
return std::string(".");
}
// The filename component should not contain a path separator. If it does,
// the splitting was done incorrectly.
assert(filename.find('/', separator_pos + 1) == std::string::npos);
return filename.substr(0, separator_pos);
}
// Extracts the file name from a path pointing to a file.
//
// The returned Slice points to |filename|'s data buffer, so it is only valid
// while |filename| is alive and unchanged.
static Slice Basename(const std::string& filename) {
std::string::size_type separator_pos = filename.rfind('/');
if (separator_pos == std::string::npos) {
return Slice(filename);
}
// The filename component should not contain a path separator. If it does,
// the splitting was done incorrectly.
assert(filename.find('/', separator_pos + 1) == std::string::npos);
return Slice(filename.data() + separator_pos + 1,
filename.length() - separator_pos - 1);
}
// True if the given file is a manifest file.
static bool IsManifest(const std::string& filename) {
return Basename(filename).starts_with("MANIFEST");
}
// buf_[0, pos_ - 1] contains data to be written to fd_.
char buf_[kWritableFileBufferSize];
size_t pos_;
int fd_;
const bool is_manifest_; // True if the file's name starts with MANIFEST.
const std::string filename_;
const std::string dirname_; // The directory of filename_.
};
随机读(随机操作没有随机写):
底层用pread(lseek和read结合的原子操作)从而保证原子性
class PosixRandomAccessFile final : public RandomAccessFile {
public:
// The new instance takes ownership of |fd|. |fd_limiter| must outlive this
// instance, and will be used to determine if .
PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
: has_permanent_fd_(fd_limiter->Acquire()),
fd_(has_permanent_fd_ ? fd : -1),
fd_limiter_(fd_limiter),
filename_(std::move(filename)) {
if (!has_permanent_fd_) {
assert(fd_ == -1);
::close(fd); // The file will be opened on every read.
}
}
~PosixRandomAccessFile() override {
if (has_permanent_fd_) {
assert(fd_ != -1);
::close(fd_);
fd_limiter_->Release();
}
}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
int fd = fd_;
if (!has_permanent_fd_) {
fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
if (fd < 0) {
return PosixError(filename_, errno);
}
}
assert(fd != -1);
Status status;
ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
*result = Slice(scratch, (read_size < 0) ? 0 : read_size);
if (read_size < 0) {
// An error: return a non-ok status.
status = PosixError(filename_, errno);
}
if (!has_permanent_fd_) {
// Close the temporary file descriptor opened earlier.
assert(fd != fd_);
::close(fd);
}
return status;
}
private:
const bool has_permanent_fd_; // If false, the file is opened on every read.
const int fd_; // -1 if has_permanent_fd_ is false.
Limiter* const fd_limiter_;
const std::string filename_;
};
另一种使用内存映射文件
class PosixMmapReadableFile final : public RandomAccessFile {
public:
// mmap_base[0, length-1] points to the memory-mapped contents of the file. It
// must be the result of a successful call to mmap(). This instances takes
// over the ownership of the region.
//
// |mmap_limiter| must outlive this instance. The caller must have already
// acquired the right to use one mmap region, which will be released when this
// instance is destroyed.
PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
Limiter* mmap_limiter)
: mmap_base_(mmap_base),
length_(length),
mmap_limiter_(mmap_limiter),
filename_(std::move(filename)) {}
~PosixMmapReadableFile() override {
::munmap(static_cast<void*>(mmap_base_), length_);
mmap_limiter_->Release();
}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
if (offset + n > length_) {
*result = Slice();
return PosixError(filename_, EINVAL);
}
*result = Slice(mmap_base_ + offset, n);
return Status::OK();
}
private:
char* const mmap_base_;
const size_t length_;
Limiter* const mmap_limiter_;
const std::string filename_;
};
log文件操作:
Logv先保存信息到缓存(一个数组),fwrite和fflush实现文件追加写入保存
class PosixLogger final : public Logger {
public:
// Creates a logger that writes to the given file.
//
// The PosixLogger instance takes ownership of the file handle.
explicit PosixLogger(std::FILE* fp) : fp_(fp) { assert(fp != nullptr); }
~PosixLogger() override { std::fclose(fp_); }
void Logv(const char* format, std::va_list arguments) override {
// Record the time as close to the Logv() call as possible.
struct ::timeval now_timeval;
::gettimeofday(&now_timeval, nullptr);
const std::time_t now_seconds = now_timeval.tv_sec;
struct std::tm now_components;
::localtime_r(&now_seconds, &now_components);
// Record the thread ID.
constexpr const int kMaxThreadIdSize = 32;
std::ostringstream thread_stream;
thread_stream << std::this_thread::get_id();
std::string thread_id = thread_stream.str();
if (thread_id.size() > kMaxThreadIdSize) {
thread_id.resize(kMaxThreadIdSize);
}
// We first attempt to print into a stack-allocated buffer. If this attempt
// fails, we make a second attempt with a dynamically allocated buffer.
constexpr const int kStackBufferSize = 512;
char stack_buffer[kStackBufferSize];
static_assert(sizeof(stack_buffer) == static_cast<size_t>(kStackBufferSize),
"sizeof(char) is expected to be 1 in C++");
int dynamic_buffer_size = 0; // Computed in the first iteration.
for (int iteration = 0; iteration < 2; ++iteration) {
const int buffer_size =
(iteration == 0) ? kStackBufferSize : dynamic_buffer_size;
char* const buffer =
(iteration == 0) ? stack_buffer : new char[dynamic_buffer_size];
// Print the header into the buffer.
int buffer_offset = std::snprintf(
buffer, buffer_size, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %s ",
now_components.tm_year + 1900, now_components.tm_mon + 1,
now_components.tm_mday, now_components.tm_hour, now_components.tm_min,
now_components.tm_sec, static_cast<int>(now_timeval.tv_usec),
thread_id.c_str());
// The header can be at most 28 characters (10 date + 15 time +
// 3 delimiters) plus the thread ID, which should fit comfortably into the
// static buffer.
assert(buffer_offset <= 28 + kMaxThreadIdSize);
static_assert(28 + kMaxThreadIdSize < kStackBufferSize,
"stack-allocated buffer may not fit the message header");
assert(buffer_offset < buffer_size);
// Print the message into the buffer.
std::va_list arguments_copy;
va_copy(arguments_copy, arguments);
buffer_offset +=
std::vsnprintf(buffer + buffer_offset, buffer_size - buffer_offset,
format, arguments_copy);
va_end(arguments_copy);
// The code below may append a newline at the end of the buffer, which
// requires an extra character.
if (buffer_offset >= buffer_size - 1) {
// The message did not fit into the buffer.
if (iteration == 0) {
// Re-run the loop and use a dynamically-allocated buffer. The buffer
// will be large enough for the log message, an extra newline and a
// null terminator.
dynamic_buffer_size = buffer_offset + 2;
continue;
}
// The dynamically-allocated buffer was incorrectly sized. This should
// not happen, assuming a correct implementation of std::(v)snprintf.
// Fail in tests, recover by truncating the log message in production.
assert(false);
buffer_offset = buffer_size - 1;
}
// Add a newline if necessary.
if (buffer[buffer_offset - 1] != '\n') {
buffer[buffer_offset] = '\n';
++buffer_offset;
}
assert(buffer_offset <= buffer_size);
std::fwrite(buffer, 1, buffer_offset, fp_);
std::fflush(fp_);
if (iteration != 0) {
delete[] buffer;
}
break;
}
}
private:
std::FILE* const fp_;
};
2.内存管理
位置在:util/arena.h
为了c++内存的高效利用,leveldb设计了arena内存池
std::vector<char*> blocks_是固定大小的内存块,存指向内存块的指针
alloc_ptr_指向最新block的可用空间的起始地址
alloc_bytes_remaining_表示当前block剩余内存
allocate函数分配内存,allocatealigned作用同allocate区别在其内存首地址满足内存对齐(内存对齐指的是内存分配后alloc_ptr_的地址应该为b/8的倍数,b指的是32位或者64位)
allocate分两种情况,如果当前remaining足够就在当前块分配内存更新remaining和ptr,如果超过了就调用allocatefullback申请内存
allocatefallback用于申请一个新的block内存空间分小于1024字节和大于两种情况,如果小于则新建一个4096的块,如果大于比如需要2000字节,就申请一个2000字节的块(用多少申请多少),这是为了减少浪费
class Arena {
public:
Arena();
Arena(const Arena&) = delete;
Arena& operator=(const Arena&) = delete;
~Arena();
// Return a pointer to a newly allocated memory block of "bytes" bytes.
char* Allocate(size_t bytes);
// Allocate memory with the normal alignment guarantees provided by malloc.
char* AllocateAligned(size_t bytes);
// Returns an estimate of the total memory usage of data allocated
// by the arena.
size_t MemoryUsage() const {
return memory_usage_.load(std::memory_order_relaxed);
}
private:
char* AllocateFallback(size_t bytes);
char* AllocateNewBlock(size_t block_bytes);
// Allocation state
char* alloc_ptr_;
size_t alloc_bytes_remaining_;
// Array of new[] allocated memory blocks
std::vector<char*> blocks_;
// Total memory usage of the arena.
//
// TODO(costan): This member is accessed via atomics, but the others are
// accessed without any locking. Is this OK?
std::atomic<size_t> memory_usage_;
};
inline char* Arena::Allocate(size_t bytes) {
// The semantics of what to return are a bit messy if we allow
// 0-byte allocations, so we disallow them here (we don't need
// them for our internal use).
assert(bytes > 0);
if (bytes <= alloc_bytes_remaining_) {
char* result = alloc_ptr_;
alloc_ptr_ += bytes;
alloc_bytes_remaining_ -= bytes;
return result;
}
return AllocateFallback(bytes);
}
char* Arena::AllocateFallback(size_t bytes) {
if (bytes > kBlockSize / 4) {
// Object is more than a quarter of our block size. Allocate it separately
// to avoid wasting too much space in leftover bytes.
char* result = AllocateNewBlock(bytes);
return result;
}
// We waste the remaining space in the current block.
alloc_ptr_ = AllocateNewBlock(kBlockSize);
alloc_bytes_remaining_ = kBlockSize;
char* result = alloc_ptr_;
alloc_ptr_ += bytes;
alloc_bytes_remaining_ -= bytes;
return result;
}
char* Arena::AllocateAligned(size_t bytes) {
const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8;
static_assert((align & (align - 1)) == 0,
"Pointer size should be a power of 2");
size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align - 1);
size_t slop = (current_mod == 0 ? 0 : align - current_mod);
size_t needed = bytes + slop;
char* result;
if (needed <= alloc_bytes_remaining_) {
result = alloc_ptr_ + slop;
alloc_ptr_ += needed;
alloc_bytes_remaining_ -= needed;
} else {
// AllocateFallback always returned aligned memory
result = AllocateFallback(bytes);
}
assert((reinterpret_cast<uintptr_t>(result) & (align - 1)) == 0);
return result;
}
char* Arena::AllocateNewBlock(size_t block_bytes) {
char* result = new char[block_bytes];
blocks_.push_back(result);
memory_usage_.fetch_add(block_bytes + sizeof(char*),
std::memory_order_relaxed);
return result;
}