From 7253c68fcea7c27dcd6fc77e5033d7818eb75a94 Mon Sep 17 00:00:00 2001 From: happyZYM Date: Sat, 4 May 2024 03:49:33 +0000 Subject: [PATCH] write snapshot --- dataguard/include/dataguard/snapshot.h | 15 +++ storage/CMakeLists.txt | 2 +- storage/include/storage/bpt.hpp | 7 +- storage/include/storage/disk_manager.h | 1 + storage/include/storage/disk_map.hpp | 78 +++++++++++ storage/include/storage/driver.h | 17 +++ .../include/storage/single_value_storage.hpp | 125 ++++++++++++++++++ .../driver.cpp} | 0 test/CMakeLists.txt | 4 +- test/MemoryRiver.hpp | 77 ++++++++--- test/snapshot_test.cpp | 93 +++++++++++++ 11 files changed, 400 insertions(+), 19 deletions(-) create mode 100644 storage/include/storage/driver.h rename storage/{include/storage/skip_list_value_storage.hpp => src/driver.cpp} (100%) create mode 100644 test/snapshot_test.cpp diff --git a/dataguard/include/dataguard/snapshot.h b/dataguard/include/dataguard/snapshot.h index 0bd7cc0..c8803f5 100644 --- a/dataguard/include/dataguard/snapshot.h +++ b/dataguard/include/dataguard/snapshot.h @@ -2,8 +2,12 @@ #define SNAP_SHOT_H #include #include "map.hpp" +#include "storage/driver.h" #include "vector.hpp" class SnapShotManager { + bool has_connected = false; + sjtu::vector drivers; + public: // For safety and simplicity, we delete all the copy/move constructor and copy/move assignment operator. Please // manager it using smart pointer. @@ -11,5 +15,16 @@ class SnapShotManager { SnapShotManager(SnapShotManager &&) = delete; SnapShotManager &operator=(const SnapShotManager &) = delete; SnapShotManager &operator=(SnapShotManager &&) = delete; + /** + * @brief connect to the data drivers + * + * @warning please ensure that the data drivers are valid and the data drivers are not changed during the life cycle. + * Meanwhile, the FileEntry collected from the data drivers should be valid during the life cycle. + */ + inline void Connect(sjtu::vector drivers_) { + if (has_connected) throw std::runtime_error("SnapShotManager has already connected to the data drivers"); + drivers = std::move(drivers_); + has_connected = true; + } }; #endif // SNAP_SHOT_H \ No newline at end of file diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt index 41afa27..c768416 100644 --- a/storage/CMakeLists.txt +++ b/storage/CMakeLists.txt @@ -1 +1 @@ -add_library(storage STATIC src/disk_manager.cpp src/replacer.cpp src/buffer_pool_manager.cpp src/bpt.cpp) \ No newline at end of file +add_library(storage STATIC src/disk_manager.cpp src/replacer.cpp src/buffer_pool_manager.cpp src/bpt.cpp src/driver.cpp) \ No newline at end of file diff --git a/storage/include/storage/bpt.hpp b/storage/include/storage/bpt.hpp index ded4ff2..52bff67 100644 --- a/storage/include/storage/bpt.hpp +++ b/storage/include/storage/bpt.hpp @@ -3,10 +3,10 @@ #include #include #include -#include "vector.hpp" #include "storage/bpt_page.hpp" #include "storage/buffer_pool_manager.h" #include "storage/config.h" +#include "vector.hpp" /** * @brief B+ Tree Indexer * @warning The KeyType must can be stored byte by byte. As this is only the indexer, the type of value is always @@ -815,7 +815,7 @@ class BPlusTreeIndexer { ++siz; return true; } - bool Remove(const KeyType &key) { // Finish Design + bool Remove(const KeyType &key, b_plus_tree_value_index_t *value_removed = nullptr) { // Finish Design #ifdef ENABLE_ADVANCED_FEATURE std::unique_lock guard(latch); #endif @@ -823,6 +823,9 @@ class BPlusTreeIndexer { if (pos.is_end) return false; if (key_cmp(key, pos.path.back().first.template As()->data.p_data[pos.path.back().second].first)) return false; + if (!value_removed) { + *value_removed = pos.path.back().first.template As()->data.p_data[pos.path.back().second].second; + } RemoveEntryAt(pos); --siz; return true; diff --git a/storage/include/storage/disk_manager.h b/storage/include/storage/disk_manager.h index cd86587..fe071e6 100644 --- a/storage/include/storage/disk_manager.h +++ b/storage/include/storage/disk_manager.h @@ -17,6 +17,7 @@ class DiskManager { * for first_empty_page_id). */ public: + DiskManager() = delete; explicit DiskManager(const std::string &file_path_, bool renew = false); ~DiskManager(); char *RawDataMemory(); diff --git a/storage/include/storage/disk_map.hpp b/storage/include/storage/disk_map.hpp index e69de29..3b3c401 100644 --- a/storage/include/storage/disk_map.hpp +++ b/storage/include/storage/disk_map.hpp @@ -0,0 +1,78 @@ +#ifndef DISK_MAP_H +#define DISK_MAP_H +#include +#include +#include +#include "storage/bpt.hpp" +#include "storage/buffer_pool_manager.h" +#include "storage/driver.h" +#include "storage/single_value_storage.hpp" +template > +class DiskMap : public DataDriverBase { + std::string index_file_identifier; + std::string index_file_path; + DiskManager *index_disk_manager; + BufferPoolManager *index_bpm; + BPlusTreeIndexer *indexer; + std::string data_file_identifier; + std::string data_file_path; + DiskManager *data_disk_manager; + BufferPoolManager *data_bpm; + SingleValueStorage *data_storage; + + public: + // for satety, all the copy/move operations are deleted, please manage it using pointer + DiskMap(std::string index_file_identifier_, std::string index_file_path_, std::string data_file_identifier_, + std::string data_file_path_) + : index_file_identifier(std::move(index_file_identifier_)), + index_file_path(std::move(index_file_path_)), + data_file_identifier(std::move(data_file_identifier_)), + data_file_path(std::move(data_file_path_)) { + index_disk_manager = new DiskManager(index_file_path); + index_bpm = new BufferPoolManager(100, 5, index_disk_manager); + indexer = new BPlusTreeIndexer(index_bpm); + data_disk_manager = new DiskManager(data_file_path); + data_bpm = new BufferPoolManager(100, 5, data_disk_manager); + data_storage = new SingleValueStorage(data_bpm); + } + ~DiskMap() { + delete indexer; + delete index_bpm; + delete index_disk_manager; + delete data_storage; + delete data_bpm; + delete data_disk_manager; + } + DiskMap(const DiskMap &) = delete; + DiskMap &operator=(const DiskMap &) = delete; + DiskMap(DiskMap &&) = delete; + DiskMap &operator=(DiskMap &&) = delete; + virtual sjtu::vector ListFiles() override { + sjtu::vector res; + res.push_back({index_file_identifier, index_file_path, index_disk_manager}); + res.push_back({data_file_identifier, data_file_path, data_disk_manager}); + return res; + } + Value Get(const Key &key) { + size_t data_id; + if ((data_id = indexer->Get(key)) == kInvalidValueIndex) throw std::runtime_error("Key not found"); + Value res; + data_storage->read(res, data_id); + return res; + } + bool Remove(const Key &key) { + b_plus_tree_value_index_t data_id; + bool remove_success = indexer->Remove(key, &data_id); + if (!remove_success) return false; + data_storage->Delete(data_id); + return true; + } + bool Put(const Key &key, Value &value) { + if (indexer->Put(key, data_storage->preview_next_blank())) { + data_storage->write(value); + return true; + } + return false; + } +}; +#endif // DISK_MAP_H \ No newline at end of file diff --git a/storage/include/storage/driver.h b/storage/include/storage/driver.h new file mode 100644 index 0000000..5e2caab --- /dev/null +++ b/storage/include/storage/driver.h @@ -0,0 +1,17 @@ +#ifndef DRIVER_H +#define DRIVER_H +#include +#include "storage/disk_manager.h" +#include "vector.hpp" +class DataDriverBase { + public: + struct FileEntry { + std::string identifier; + std::string path; + DiskManager *disk_manager; + }; + DataDriverBase() = default; + virtual ~DataDriverBase() = default; + virtual sjtu::vector ListFiles() = 0; +}; +#endif // DRIVER_H \ No newline at end of file diff --git a/storage/include/storage/single_value_storage.hpp b/storage/include/storage/single_value_storage.hpp index e69de29..c60d635 100644 --- a/storage/include/storage/single_value_storage.hpp +++ b/storage/include/storage/single_value_storage.hpp @@ -0,0 +1,125 @@ +#ifndef SINGLE_VALUE_STORAGE_HPP +#define SINGLE_VALUE_STORAGE_HPP +#include +#include "storage/buffer_pool_manager.h" +#include "storage/config.h" +#include "storage/disk_manager.h" +template +class SingleValueStorage { + private: + struct ElementPair { + T data; + size_t nxt_blank; + }; + const static size_t max_element_in_page = (4096 - sizeof(size_t)) / sizeof(ElementPair); + struct DataType { + size_t elements_count; + ElementPair elements[max_element_in_page]; + }; + union Page { + DataType dat; + char filler[4096]; + }; + // data_id = frame_id * max_element_in_page + element_id + BufferPoolManager *bpm; + size_t first_blank_element_pair_id; + char *raw_mem; + static_assert(info_len * sizeof(int) <= 4000, "info_len should be less than 4000"); + static_assert(sizeof(T) <= 4088, "T should be less than 4088"); + void CloseFile() { + memcpy(raw_mem, &first_blank_element_pair_id, sizeof(size_t)); + bpm->FlushAllPages(); + bpm = nullptr; + } + + public: + SingleValueStorage() = delete; + SingleValueStorage(const SingleValueStorage &) = delete; + SingleValueStorage(SingleValueStorage &&) = delete; + SingleValueStorage &operator=(const SingleValueStorage &) = delete; + SingleValueStorage &operator=(SingleValueStorage &&) = delete; + + SingleValueStorage(BufferPoolManager *bpm) : bpm(bpm) { + raw_mem = bpm->RawDataMemory(); + memcpy(&first_blank_element_pair_id, raw_mem, sizeof(size_t)); + } + + ~SingleValueStorage() { + if (bpm != nullptr) CloseFile(); + } + + void get_info(int &tmp, int n) { + if (n > info_len) return; + n += 2; + memcpy(&tmp, raw_mem + n * sizeof(int), sizeof(int)); + } + + void write_info(int tmp, int n) { + if (n > info_len) return; + n += 2; + memcpy(raw_mem + n * sizeof(int), &tmp, sizeof(int)); + } + + size_t preview_next_blank() { + if (first_blank_element_pair_id != 0) return first_blank_element_pair_id; + frame_id_t frame_id; + BasicPageGuard guard = bpm->NewPageGuarded(&frame_id); + first_blank_element_pair_id = frame_id * max_element_in_page; + for (size_t i = 0; i < max_element_in_page - 1; i++) { + guard.AsMut()->dat.elements[i].nxt_blank = first_blank_element_pair_id + i + 1; + } + guard.AsMut()->dat.elements[max_element_in_page - 1].nxt_blank = 0; + guard.AsMut()->dat.elements_count = 0; + return first_blank_element_pair_id; + } + int write(T &t) { + size_t element_id = first_blank_element_pair_id; + size_t res_id = 0; + if (element_id != 0) { + res_id = element_id; + frame_id_t frame_id = element_id / max_element_in_page; + element_id %= max_element_in_page; + WritePageGuard guard = bpm->FetchPageWrite(frame_id); + first_blank_element_pair_id = guard.AsMut()->dat.elements[element_id].nxt_blank; + guard.AsMut()->dat.elements[element_id].data = t; + guard.AsMut()->dat.elements_count++; + } else { + frame_id_t frame_id; + BasicPageGuard guard = bpm->NewPageGuarded(&frame_id); + guard.AsMut()->dat.elements[0].data = t; + element_id = frame_id * max_element_in_page; + res_id = element_id; + if (max_element_in_page > 1) first_blank_element_pair_id = element_id + 1; + for (size_t i = 1; i < max_element_in_page - 1; i++) { + guard.AsMut()->dat.elements[i].nxt_blank = element_id + i + 1; + } + guard.AsMut()->dat.elements[max_element_in_page - 1].nxt_blank = 0; + guard.AsMut()->dat.elements_count = 1; + } + return res_id; + } + + void update(T &t, const int index) { + size_t frame_id = index / max_element_in_page; + WritePageGuard guard = bpm->FetchPageWrite(frame_id); + guard.AsMut()->dat.elements[index % max_element_in_page].data = t; + } + + //读出位置索引index对应的T对象的值并赋值给t,保证调用的index都是由write函数产生 + void read(T &t, const int index) { + size_t frame_id = index / max_element_in_page; + ReadPageGuard guard = bpm->FetchPageRead(frame_id); + t = guard.As()->dat.elements[index % max_element_in_page].data; + } + + //删除位置索引index对应的对象(不涉及空间回收时,可忽略此函数),保证调用的index都是由write函数产生 + void Delete(int index) { + size_t frame_id = index / max_element_in_page; + WritePageGuard guard = bpm->FetchPageWrite(frame_id); + size_t element_id = index % max_element_in_page; + guard.AsMut()->dat.elements[element_id].nxt_blank = first_blank_element_pair_id; + first_blank_element_pair_id = index; + guard.AsMut()->dat.elements_count--; + } +}; +#endif // SINGLE_VALUE_STORAGE_HPP \ No newline at end of file diff --git a/storage/include/storage/skip_list_value_storage.hpp b/storage/src/driver.cpp similarity index 100% rename from storage/include/storage/skip_list_value_storage.hpp rename to storage/src/driver.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 77aac08..072b113 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,4 +18,6 @@ set_target_properties(t1_std PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_ add_executable(t1_mk t1_mk.cpp) set_target_properties(t1_mk PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) add_executable(bpt_advanced_test bpt_advanced_test.cpp) -target_link_libraries(bpt_advanced_test storage GTest::gtest_main spdlog::spdlog) \ No newline at end of file +target_link_libraries(bpt_advanced_test storage GTest::gtest_main spdlog::spdlog) +add_executable(snapshot_test snapshot_test.cpp) +target_link_libraries(snapshot_test storage dataguard GTest::gtest_main spdlog::spdlog) \ No newline at end of file diff --git a/test/MemoryRiver.hpp b/test/MemoryRiver.hpp index 40b3267..9d35dd6 100644 --- a/test/MemoryRiver.hpp +++ b/test/MemoryRiver.hpp @@ -16,16 +16,27 @@ using std::string; template class MemoryRiver { private: - union Page { + struct ElementPair { T data; + size_t nxt_blank; + }; + const static size_t max_element_in_page = (4096 - sizeof(size_t)) / sizeof(ElementPair); + struct DataType { + size_t elements_count; + ElementPair elements[max_element_in_page]; + }; + union Page { + DataType dat; char filler[4096]; }; + // data_id = frame_id * max_element_in_page + element_id std::string file_name; DiskManager *disk_manager; BufferPoolManager *bpm; + size_t first_blank_element_pair_id; char *raw_mem; static_assert(info_len * sizeof(int) <= 4000, "info_len should be less than 4000"); - static_assert(sizeof(T) <= 4096, "T should be less than 4096"); + static_assert(sizeof(T) <= 4088, "T should be less than 4088"); public: MemoryRiver() : disk_manager(nullptr), bpm(nullptr), file_name("") {} @@ -34,8 +45,10 @@ class MemoryRiver { disk_manager = new DiskManager(file_name); bpm = new BufferPoolManager(100, 5, disk_manager); raw_mem = bpm->RawDataMemory(); + memcpy(&first_blank_element_pair_id, raw_mem, sizeof(size_t)); } void CloseFile() { + memcpy(raw_mem, &first_blank_element_pair_id, sizeof(size_t)); bpm->FlushAllPages(); file_name = ""; delete bpm; @@ -49,50 +62,84 @@ class MemoryRiver { void initialise(string FN = "") { if (file_name != "") { - std::string name_bak=file_name; + std::string name_bak = file_name; CloseFile(); file_name = name_bak; } if (FN != "") file_name = FN; if (file_name == "") return; - disk_manager = new DiskManager(file_name); + disk_manager = new DiskManager(file_name, true); bpm = new BufferPoolManager(100, 5, disk_manager); raw_mem = bpm->RawDataMemory(); memset(raw_mem, 0, bpm->RawDatMemorySize()); + first_blank_element_pair_id = 0; } void get_info(int &tmp, int n) { if (n > info_len) return; - n--; + n += 2; memcpy(&tmp, raw_mem + n * sizeof(int), sizeof(int)); } void write_info(int tmp, int n) { if (n > info_len) return; - n--; + n += 2; memcpy(raw_mem + n * sizeof(int), &tmp, sizeof(int)); } int write(T &t) { - frame_id_t frame_id; - BasicPageGuard guard = bpm->NewPageGuarded(&frame_id); - guard.AsMut()->data = t; - return frame_id; + size_t element_id = first_blank_element_pair_id; + size_t res_id = 0; + if (element_id != 0) { + res_id = element_id; + frame_id_t frame_id = element_id / max_element_in_page; + element_id %= max_element_in_page; + WritePageGuard guard = bpm->FetchPageWrite(frame_id); + first_blank_element_pair_id = guard.AsMut()->dat.elements[element_id].nxt_blank; + guard.AsMut()->dat.elements[element_id].data = t; + guard.AsMut()->dat.elements_count++; + } else { + frame_id_t frame_id; + BasicPageGuard guard = bpm->NewPageGuarded(&frame_id); + guard.AsMut()->dat.elements[0].data = t; + element_id = frame_id * max_element_in_page; + res_id = element_id; + if (max_element_in_page > 1) first_blank_element_pair_id = element_id + 1; + for (size_t i = 1; i < max_element_in_page - 1; i++) { + guard.AsMut()->dat.elements[i].nxt_blank = element_id + i + 1; + } + guard.AsMut()->dat.elements[max_element_in_page - 1].nxt_blank = 0; + guard.AsMut()->dat.elements_count = 1; + } + return res_id; } void update(T &t, const int index) { - WritePageGuard guard = bpm->FetchPageWrite(index); - guard.AsMut()->data = t; + size_t frame_id = index / max_element_in_page; + WritePageGuard guard = bpm->FetchPageWrite(frame_id); + guard.AsMut()->dat.elements[index % max_element_in_page].data = t; } //读出位置索引index对应的T对象的值并赋值给t,保证调用的index都是由write函数产生 void read(T &t, const int index) { - ReadPageGuard guard = bpm->FetchPageRead(index); - t = guard.As()->data; + size_t frame_id = index / max_element_in_page; + ReadPageGuard guard = bpm->FetchPageRead(frame_id); + t = guard.As()->dat.elements[index % max_element_in_page].data; } //删除位置索引index对应的对象(不涉及空间回收时,可忽略此函数),保证调用的index都是由write函数产生 - void Delete(int index) { bpm->DeletePage(index); } + void Delete(int index) { + size_t frame_id = index / max_element_in_page; + WritePageGuard guard = bpm->FetchPageWrite(frame_id); + size_t element_id = index % max_element_in_page; + guard.AsMut()->dat.elements[element_id].nxt_blank = first_blank_element_pair_id; + first_blank_element_pair_id = index; + guard.AsMut()->dat.elements_count--; + // if (guard.AsMut()->dat.elements_count == 0) { + // guard.Drop(); + // bpm->DeletePage(frame_id); + // } + } }; #endif // BPT_MEMORYRIVER_HPP \ No newline at end of file diff --git a/test/snapshot_test.cpp b/test/snapshot_test.cpp new file mode 100644 index 0000000..34df0b1 --- /dev/null +++ b/test/snapshot_test.cpp @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dataguard/dataguard.h" +#include "storage/disk_map.hpp" + +namespace SnapShotTest { +template +struct KeysContainerForGenerator { + std::deque keys_list; + std::set keys_set; + bool IsIn(const Key &key) const { return keys_set.find(key) != keys_set.end(); } + template + Key GetRandomKey(random_generator_t &rnd) { + return keys_list[rnd() % keys_list.size()]; + } + void AddKey(const Key &key) { + if (IsIn(key)) return; + keys_list.insert(std::lower_bound(keys_list.begin(), keys_list.end(), key), key); + keys_set.insert(key); + } + void RemoveKey(const Key &key) { + if (!IsIn(key)) return; + keys_list.erase(std::lower_bound(keys_list.begin(), keys_list.end(), key)); + keys_set.erase(key); + } + size_t Size() const { return keys_list.size(); } +}; +} // namespace SnapShotTest +TEST(Hello, World) { return; } + +TEST(Basic, DiskMap) { + using namespace SnapShotTest; + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + KeysContainerForGenerator keys_container; + std::map std_map; + remove("/tmp/index.db"); + remove("/tmp/data.db"); + const int total_opts = 10; + { + DiskMap disk_map("index", "/tmp/index.db", "data", "/tmp/data.db"); + for (int i = 0; i < total_opts; i++) { + int opt_id = rnd() % 100; + if (opt_id <= 30) { + if (keys_container.Size() > 0 && rnd() % 5 <= 2) { + // overrite and existing key + int key = keys_container.GetRandomKey(rnd); + int val = rnd() % 1000000; + std_map[key] = val; + disk_map.Put(key, val); + } else { + // insert a new key + int key = rnd() % 1000000; + int val = rnd() % 1000000; + keys_container.AddKey(key); + std_map[key] = val; + disk_map.Put(key, val); + } + } else if (opt_id <= 60) { + if (keys_container.Size() > 0 && rnd() % 5 <= 2) { + // delete an existing key + int key = keys_container.GetRandomKey(rnd); + keys_container.RemoveKey(key); + std_map.erase(key); + disk_map.Remove(key); + } else { + // delete a non-existing key + int key = rnd() % 1000000; + keys_container.RemoveKey(key); + std_map.erase(key); + disk_map.Remove(key); + } + } else { + if (keys_container.Size() == 0) continue; + int key = keys_container.GetRandomKey(rnd); + int val = disk_map.Get(key); + if (std_map.find(key) == std_map.end()) { + ASSERT_EQ(val, -1); + } else { + ASSERT_EQ(val, std_map[key]); + } + } + } + } +} \ No newline at end of file