write snapshot

This commit is contained in:
2024-05-04 03:49:33 +00:00
parent ec920f7255
commit 7253c68fce
11 changed files with 400 additions and 19 deletions

View File

@ -2,8 +2,12 @@
#define SNAP_SHOT_H
#include <string>
#include "map.hpp"
#include "storage/driver.h"
#include "vector.hpp"
class SnapShotManager {
bool has_connected = false;
sjtu::vector<DataDriverBase *> drivers;
public:
// For safety and simplicity, we delete all the copy/move constructor and copy/move assignment operator. Please
// manager it using smart pointer.
@ -11,5 +15,16 @@ class SnapShotManager {
SnapShotManager(SnapShotManager &&) = delete;
SnapShotManager &operator=(const SnapShotManager &) = delete;
SnapShotManager &operator=(SnapShotManager &&) = delete;
/**
* @brief connect to the data drivers
*
* @warning please ensure that the data drivers are valid and the data drivers are not changed during the life cycle.
* Meanwhile, the FileEntry collected from the data drivers should be valid during the life cycle.
*/
inline void Connect(sjtu::vector<DataDriverBase *> drivers_) {
if (has_connected) throw std::runtime_error("SnapShotManager has already connected to the data drivers");
drivers = std::move(drivers_);
has_connected = true;
}
};
#endif // SNAP_SHOT_H

View File

@ -1 +1 @@
add_library(storage STATIC src/disk_manager.cpp src/replacer.cpp src/buffer_pool_manager.cpp src/bpt.cpp)
add_library(storage STATIC src/disk_manager.cpp src/replacer.cpp src/buffer_pool_manager.cpp src/bpt.cpp src/driver.cpp)

View File

@ -3,10 +3,10 @@
#include <cassert>
#include <cstring>
#include <shared_mutex>
#include "vector.hpp"
#include "storage/bpt_page.hpp"
#include "storage/buffer_pool_manager.h"
#include "storage/config.h"
#include "vector.hpp"
/**
* @brief B+ Tree Indexer
* @warning The KeyType must can be stored byte by byte. As this is only the indexer, the type of value is always
@ -815,7 +815,7 @@ class BPlusTreeIndexer {
++siz;
return true;
}
bool Remove(const KeyType &key) { // Finish Design
bool Remove(const KeyType &key, b_plus_tree_value_index_t *value_removed = nullptr) { // Finish Design
#ifdef ENABLE_ADVANCED_FEATURE
std::unique_lock<std::shared_mutex> guard(latch);
#endif
@ -823,6 +823,9 @@ class BPlusTreeIndexer {
if (pos.is_end) return false;
if (key_cmp(key, pos.path.back().first.template As<PageType>()->data.p_data[pos.path.back().second].first))
return false;
if (!value_removed) {
*value_removed = pos.path.back().first.template As<PageType>()->data.p_data[pos.path.back().second].second;
}
RemoveEntryAt(pos);
--siz;
return true;

View File

@ -17,6 +17,7 @@ class DiskManager {
* for first_empty_page_id).
*/
public:
DiskManager() = delete;
explicit DiskManager(const std::string &file_path_, bool renew = false);
~DiskManager();
char *RawDataMemory();

View File

@ -0,0 +1,78 @@
#ifndef DISK_MAP_H
#define DISK_MAP_H
#include <algorithm>
#include <string>
#include <utility>
#include "storage/bpt.hpp"
#include "storage/buffer_pool_manager.h"
#include "storage/driver.h"
#include "storage/single_value_storage.hpp"
template <typename Key, typename Value, typename Compare = std::less<Key>>
class DiskMap : public DataDriverBase {
std::string index_file_identifier;
std::string index_file_path;
DiskManager *index_disk_manager;
BufferPoolManager *index_bpm;
BPlusTreeIndexer<Key, Compare> *indexer;
std::string data_file_identifier;
std::string data_file_path;
DiskManager *data_disk_manager;
BufferPoolManager *data_bpm;
SingleValueStorage<Value> *data_storage;
public:
// for satety, all the copy/move operations are deleted, please manage it using pointer
DiskMap(std::string index_file_identifier_, std::string index_file_path_, std::string data_file_identifier_,
std::string data_file_path_)
: index_file_identifier(std::move(index_file_identifier_)),
index_file_path(std::move(index_file_path_)),
data_file_identifier(std::move(data_file_identifier_)),
data_file_path(std::move(data_file_path_)) {
index_disk_manager = new DiskManager(index_file_path);
index_bpm = new BufferPoolManager(100, 5, index_disk_manager);
indexer = new BPlusTreeIndexer<Key, Compare>(index_bpm);
data_disk_manager = new DiskManager(data_file_path);
data_bpm = new BufferPoolManager(100, 5, data_disk_manager);
data_storage = new SingleValueStorage<Value>(data_bpm);
}
~DiskMap() {
delete indexer;
delete index_bpm;
delete index_disk_manager;
delete data_storage;
delete data_bpm;
delete data_disk_manager;
}
DiskMap(const DiskMap &) = delete;
DiskMap &operator=(const DiskMap &) = delete;
DiskMap(DiskMap &&) = delete;
DiskMap &operator=(DiskMap &&) = delete;
virtual sjtu::vector<FileEntry> ListFiles() override {
sjtu::vector<FileEntry> res;
res.push_back({index_file_identifier, index_file_path, index_disk_manager});
res.push_back({data_file_identifier, data_file_path, data_disk_manager});
return res;
}
Value Get(const Key &key) {
size_t data_id;
if ((data_id = indexer->Get(key)) == kInvalidValueIndex) throw std::runtime_error("Key not found");
Value res;
data_storage->read(res, data_id);
return res;
}
bool Remove(const Key &key) {
b_plus_tree_value_index_t data_id;
bool remove_success = indexer->Remove(key, &data_id);
if (!remove_success) return false;
data_storage->Delete(data_id);
return true;
}
bool Put(const Key &key, Value &value) {
if (indexer->Put(key, data_storage->preview_next_blank())) {
data_storage->write(value);
return true;
}
return false;
}
};
#endif // DISK_MAP_H

View File

@ -0,0 +1,17 @@
#ifndef DRIVER_H
#define DRIVER_H
#include <string>
#include "storage/disk_manager.h"
#include "vector.hpp"
class DataDriverBase {
public:
struct FileEntry {
std::string identifier;
std::string path;
DiskManager *disk_manager;
};
DataDriverBase() = default;
virtual ~DataDriverBase() = default;
virtual sjtu::vector<FileEntry> ListFiles() = 0;
};
#endif // DRIVER_H

View File

@ -0,0 +1,125 @@
#ifndef SINGLE_VALUE_STORAGE_HPP
#define SINGLE_VALUE_STORAGE_HPP
#include <string>
#include "storage/buffer_pool_manager.h"
#include "storage/config.h"
#include "storage/disk_manager.h"
template <class T, int info_len = 2>
class SingleValueStorage {
private:
struct ElementPair {
T data;
size_t nxt_blank;
};
const static size_t max_element_in_page = (4096 - sizeof(size_t)) / sizeof(ElementPair);
struct DataType {
size_t elements_count;
ElementPair elements[max_element_in_page];
};
union Page {
DataType dat;
char filler[4096];
};
// data_id = frame_id * max_element_in_page + element_id
BufferPoolManager *bpm;
size_t first_blank_element_pair_id;
char *raw_mem;
static_assert(info_len * sizeof(int) <= 4000, "info_len should be less than 4000");
static_assert(sizeof(T) <= 4088, "T should be less than 4088");
void CloseFile() {
memcpy(raw_mem, &first_blank_element_pair_id, sizeof(size_t));
bpm->FlushAllPages();
bpm = nullptr;
}
public:
SingleValueStorage() = delete;
SingleValueStorage(const SingleValueStorage &) = delete;
SingleValueStorage(SingleValueStorage &&) = delete;
SingleValueStorage &operator=(const SingleValueStorage &) = delete;
SingleValueStorage &operator=(SingleValueStorage &&) = delete;
SingleValueStorage(BufferPoolManager *bpm) : bpm(bpm) {
raw_mem = bpm->RawDataMemory();
memcpy(&first_blank_element_pair_id, raw_mem, sizeof(size_t));
}
~SingleValueStorage() {
if (bpm != nullptr) CloseFile();
}
void get_info(int &tmp, int n) {
if (n > info_len) return;
n += 2;
memcpy(&tmp, raw_mem + n * sizeof(int), sizeof(int));
}
void write_info(int tmp, int n) {
if (n > info_len) return;
n += 2;
memcpy(raw_mem + n * sizeof(int), &tmp, sizeof(int));
}
size_t preview_next_blank() {
if (first_blank_element_pair_id != 0) return first_blank_element_pair_id;
frame_id_t frame_id;
BasicPageGuard guard = bpm->NewPageGuarded(&frame_id);
first_blank_element_pair_id = frame_id * max_element_in_page;
for (size_t i = 0; i < max_element_in_page - 1; i++) {
guard.AsMut<Page>()->dat.elements[i].nxt_blank = first_blank_element_pair_id + i + 1;
}
guard.AsMut<Page>()->dat.elements[max_element_in_page - 1].nxt_blank = 0;
guard.AsMut<Page>()->dat.elements_count = 0;
return first_blank_element_pair_id;
}
int write(T &t) {
size_t element_id = first_blank_element_pair_id;
size_t res_id = 0;
if (element_id != 0) {
res_id = element_id;
frame_id_t frame_id = element_id / max_element_in_page;
element_id %= max_element_in_page;
WritePageGuard guard = bpm->FetchPageWrite(frame_id);
first_blank_element_pair_id = guard.AsMut<Page>()->dat.elements[element_id].nxt_blank;
guard.AsMut<Page>()->dat.elements[element_id].data = t;
guard.AsMut<Page>()->dat.elements_count++;
} else {
frame_id_t frame_id;
BasicPageGuard guard = bpm->NewPageGuarded(&frame_id);
guard.AsMut<Page>()->dat.elements[0].data = t;
element_id = frame_id * max_element_in_page;
res_id = element_id;
if (max_element_in_page > 1) first_blank_element_pair_id = element_id + 1;
for (size_t i = 1; i < max_element_in_page - 1; i++) {
guard.AsMut<Page>()->dat.elements[i].nxt_blank = element_id + i + 1;
}
guard.AsMut<Page>()->dat.elements[max_element_in_page - 1].nxt_blank = 0;
guard.AsMut<Page>()->dat.elements_count = 1;
}
return res_id;
}
void update(T &t, const int index) {
size_t frame_id = index / max_element_in_page;
WritePageGuard guard = bpm->FetchPageWrite(frame_id);
guard.AsMut<Page>()->dat.elements[index % max_element_in_page].data = t;
}
//读出位置索引index对应的T对象的值并赋值给t保证调用的index都是由write函数产生
void read(T &t, const int index) {
size_t frame_id = index / max_element_in_page;
ReadPageGuard guard = bpm->FetchPageRead(frame_id);
t = guard.As<Page>()->dat.elements[index % max_element_in_page].data;
}
//删除位置索引index对应的对象(不涉及空间回收时,可忽略此函数)保证调用的index都是由write函数产生
void Delete(int index) {
size_t frame_id = index / max_element_in_page;
WritePageGuard guard = bpm->FetchPageWrite(frame_id);
size_t element_id = index % max_element_in_page;
guard.AsMut<Page>()->dat.elements[element_id].nxt_blank = first_blank_element_pair_id;
first_blank_element_pair_id = index;
guard.AsMut<Page>()->dat.elements_count--;
}
};
#endif // SINGLE_VALUE_STORAGE_HPP

View File

@ -19,3 +19,5 @@ add_executable(t1_mk t1_mk.cpp)
set_target_properties(t1_mk PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_executable(bpt_advanced_test bpt_advanced_test.cpp)
target_link_libraries(bpt_advanced_test storage GTest::gtest_main spdlog::spdlog)
add_executable(snapshot_test snapshot_test.cpp)
target_link_libraries(snapshot_test storage dataguard GTest::gtest_main spdlog::spdlog)

View File

@ -16,16 +16,27 @@ using std::string;
template <class T, int info_len = 2>
class MemoryRiver {
private:
union Page {
struct ElementPair {
T data;
size_t nxt_blank;
};
const static size_t max_element_in_page = (4096 - sizeof(size_t)) / sizeof(ElementPair);
struct DataType {
size_t elements_count;
ElementPair elements[max_element_in_page];
};
union Page {
DataType dat;
char filler[4096];
};
// data_id = frame_id * max_element_in_page + element_id
std::string file_name;
DiskManager *disk_manager;
BufferPoolManager *bpm;
size_t first_blank_element_pair_id;
char *raw_mem;
static_assert(info_len * sizeof(int) <= 4000, "info_len should be less than 4000");
static_assert(sizeof(T) <= 4096, "T should be less than 4096");
static_assert(sizeof(T) <= 4088, "T should be less than 4088");
public:
MemoryRiver() : disk_manager(nullptr), bpm(nullptr), file_name("") {}
@ -34,8 +45,10 @@ class MemoryRiver {
disk_manager = new DiskManager(file_name);
bpm = new BufferPoolManager(100, 5, disk_manager);
raw_mem = bpm->RawDataMemory();
memcpy(&first_blank_element_pair_id, raw_mem, sizeof(size_t));
}
void CloseFile() {
memcpy(raw_mem, &first_blank_element_pair_id, sizeof(size_t));
bpm->FlushAllPages();
file_name = "";
delete bpm;
@ -49,50 +62,84 @@ class MemoryRiver {
void initialise(string FN = "") {
if (file_name != "") {
std::string name_bak=file_name;
std::string name_bak = file_name;
CloseFile();
file_name = name_bak;
}
if (FN != "") file_name = FN;
if (file_name == "") return;
disk_manager = new DiskManager(file_name);
disk_manager = new DiskManager(file_name, true);
bpm = new BufferPoolManager(100, 5, disk_manager);
raw_mem = bpm->RawDataMemory();
memset(raw_mem, 0, bpm->RawDatMemorySize());
first_blank_element_pair_id = 0;
}
void get_info(int &tmp, int n) {
if (n > info_len) return;
n--;
n += 2;
memcpy(&tmp, raw_mem + n * sizeof(int), sizeof(int));
}
void write_info(int tmp, int n) {
if (n > info_len) return;
n--;
n += 2;
memcpy(raw_mem + n * sizeof(int), &tmp, sizeof(int));
}
int write(T &t) {
frame_id_t frame_id;
BasicPageGuard guard = bpm->NewPageGuarded(&frame_id);
guard.AsMut<Page>()->data = t;
return frame_id;
size_t element_id = first_blank_element_pair_id;
size_t res_id = 0;
if (element_id != 0) {
res_id = element_id;
frame_id_t frame_id = element_id / max_element_in_page;
element_id %= max_element_in_page;
WritePageGuard guard = bpm->FetchPageWrite(frame_id);
first_blank_element_pair_id = guard.AsMut<Page>()->dat.elements[element_id].nxt_blank;
guard.AsMut<Page>()->dat.elements[element_id].data = t;
guard.AsMut<Page>()->dat.elements_count++;
} else {
frame_id_t frame_id;
BasicPageGuard guard = bpm->NewPageGuarded(&frame_id);
guard.AsMut<Page>()->dat.elements[0].data = t;
element_id = frame_id * max_element_in_page;
res_id = element_id;
if (max_element_in_page > 1) first_blank_element_pair_id = element_id + 1;
for (size_t i = 1; i < max_element_in_page - 1; i++) {
guard.AsMut<Page>()->dat.elements[i].nxt_blank = element_id + i + 1;
}
guard.AsMut<Page>()->dat.elements[max_element_in_page - 1].nxt_blank = 0;
guard.AsMut<Page>()->dat.elements_count = 1;
}
return res_id;
}
void update(T &t, const int index) {
WritePageGuard guard = bpm->FetchPageWrite(index);
guard.AsMut<Page>()->data = t;
size_t frame_id = index / max_element_in_page;
WritePageGuard guard = bpm->FetchPageWrite(frame_id);
guard.AsMut<Page>()->dat.elements[index % max_element_in_page].data = t;
}
//读出位置索引index对应的T对象的值并赋值给t保证调用的index都是由write函数产生
void read(T &t, const int index) {
ReadPageGuard guard = bpm->FetchPageRead(index);
t = guard.As<Page>()->data;
size_t frame_id = index / max_element_in_page;
ReadPageGuard guard = bpm->FetchPageRead(frame_id);
t = guard.As<Page>()->dat.elements[index % max_element_in_page].data;
}
//删除位置索引index对应的对象(不涉及空间回收时,可忽略此函数)保证调用的index都是由write函数产生
void Delete(int index) { bpm->DeletePage(index); }
void Delete(int index) {
size_t frame_id = index / max_element_in_page;
WritePageGuard guard = bpm->FetchPageWrite(frame_id);
size_t element_id = index % max_element_in_page;
guard.AsMut<Page>()->dat.elements[element_id].nxt_blank = first_blank_element_pair_id;
first_blank_element_pair_id = index;
guard.AsMut<Page>()->dat.elements_count--;
// if (guard.AsMut<Page>()->dat.elements_count == 0) {
// guard.Drop();
// bpm->DeletePage(frame_id);
// }
}
};
#endif // BPT_MEMORYRIVER_HPP

93
test/snapshot_test.cpp Normal file
View File

@ -0,0 +1,93 @@
#include <gtest/gtest.h>
#include <spdlog/async.h>
#include <spdlog/sinks/basic_file_sink.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <spdlog/spdlog.h>
#include <algorithm>
#include <deque>
#include <map>
#include <random>
#include <set>
#include "dataguard/dataguard.h"
#include "storage/disk_map.hpp"
namespace SnapShotTest {
template <typename Key>
struct KeysContainerForGenerator {
std::deque<Key> keys_list;
std::set<Key> keys_set;
bool IsIn(const Key &key) const { return keys_set.find(key) != keys_set.end(); }
template <typename random_generator_t>
Key GetRandomKey(random_generator_t &rnd) {
return keys_list[rnd() % keys_list.size()];
}
void AddKey(const Key &key) {
if (IsIn(key)) return;
keys_list.insert(std::lower_bound(keys_list.begin(), keys_list.end(), key), key);
keys_set.insert(key);
}
void RemoveKey(const Key &key) {
if (!IsIn(key)) return;
keys_list.erase(std::lower_bound(keys_list.begin(), keys_list.end(), key));
keys_set.erase(key);
}
size_t Size() const { return keys_list.size(); }
};
} // namespace SnapShotTest
TEST(Hello, World) { return; }
TEST(Basic, DiskMap) {
using namespace SnapShotTest;
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(RndSeed);
KeysContainerForGenerator<int> keys_container;
std::map<int, int> std_map;
remove("/tmp/index.db");
remove("/tmp/data.db");
const int total_opts = 10;
{
DiskMap<int, int> disk_map("index", "/tmp/index.db", "data", "/tmp/data.db");
for (int i = 0; i < total_opts; i++) {
int opt_id = rnd() % 100;
if (opt_id <= 30) {
if (keys_container.Size() > 0 && rnd() % 5 <= 2) {
// overrite and existing key
int key = keys_container.GetRandomKey(rnd);
int val = rnd() % 1000000;
std_map[key] = val;
disk_map.Put(key, val);
} else {
// insert a new key
int key = rnd() % 1000000;
int val = rnd() % 1000000;
keys_container.AddKey(key);
std_map[key] = val;
disk_map.Put(key, val);
}
} else if (opt_id <= 60) {
if (keys_container.Size() > 0 && rnd() % 5 <= 2) {
// delete an existing key
int key = keys_container.GetRandomKey(rnd);
keys_container.RemoveKey(key);
std_map.erase(key);
disk_map.Remove(key);
} else {
// delete a non-existing key
int key = rnd() % 1000000;
keys_container.RemoveKey(key);
std_map.erase(key);
disk_map.Remove(key);
}
} else {
if (keys_container.Size() == 0) continue;
int key = keys_container.GetRandomKey(rnd);
int val = disk_map.Get(key);
if (std_map.find(key) == std_map.end()) {
ASSERT_EQ(val, -1);
} else {
ASSERT_EQ(val, std_map[key]);
}
}
}
}
}