From c59775c18401e863499494dd6e08d37da57dda75 Mon Sep 17 00:00:00 2001 From: happyZYM Date: Sat, 4 May 2024 14:33:12 +0000 Subject: [PATCH] write create snap shot --- dataguard/include/dataguard/snapshot.h | 35 +++++++- dataguard/src/snapshot.cpp | 82 +++++++++++++++++++ storage/include/storage/disk_map.hpp | 4 + storage/include/storage/driver.h | 1 + .../include/storage/single_value_storage.hpp | 5 +- test/snapshot_test.cpp | 27 ++++++ 6 files changed, 152 insertions(+), 2 deletions(-) diff --git a/dataguard/include/dataguard/snapshot.h b/dataguard/include/dataguard/snapshot.h index 40136c3..5f6977a 100644 --- a/dataguard/include/dataguard/snapshot.h +++ b/dataguard/include/dataguard/snapshot.h @@ -5,14 +5,28 @@ #include "storage/driver.h" #include "vector.hpp" void GenerateDiff(const std::string &old_file, const std::string &new_file, const std::string &diff_file); -void ApplyPatch(const std::string &old_file, const std::string &diff_file, const std::string &new_file, bool is_reverse); +void ApplyPatch(const std::string &old_file, const std::string &diff_file, const std::string &new_file, + bool is_reverse); +/** + * @brief SnapShotManager is a class to manage the snapshot of the data drivers. + */ + +/** +The structure of the meta file is as follows: +[HEAD] +[ID1] [Anc1] +... +*/ class SnapShotManager { bool has_connected = false; + bool has_set_meta_file = false; sjtu::vector drivers; + std::string meta_file; public: // For safety and simplicity, we delete all the copy/move constructor and copy/move assignment operator. Please // manager it using smart pointer. + SnapShotManager() = default; SnapShotManager(const SnapShotManager &) = delete; SnapShotManager(SnapShotManager &&) = delete; SnapShotManager &operator=(const SnapShotManager &) = delete; @@ -28,5 +42,24 @@ class SnapShotManager { drivers = std::move(drivers_); has_connected = true; } + inline void SetMetaFile(const std::string &meta_file_) { + if (has_set_meta_file) throw std::runtime_error("SnapShotManager has already set the meta file"); + has_set_meta_file = true; + meta_file = meta_file_; + // check if the file exists + FILE *f = fopen(meta_file.c_str(), "r"); + if (f == nullptr) { + if (!has_connected) + throw std::runtime_error( + "SnapShotManager has not connected to the data drivers before initializing the repository"); + InitializeRepository(); + } else { + fclose(f); + } + } + void InitializeRepository(); + void CreateSnapShot(const std::string &snap_shot_ID); + void SwitchToSnapShot(const std::string &snap_shot_ID); + void RemoveSnapShot(const std::string &snap_shot_ID); }; #endif // SNAP_SHOT_H \ No newline at end of file diff --git a/dataguard/src/snapshot.cpp b/dataguard/src/snapshot.cpp index e5ca522..ae87392 100644 --- a/dataguard/src/snapshot.cpp +++ b/dataguard/src/snapshot.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include "map.hpp" #include "storage/config.h" #include "vector.hpp" @@ -13,6 +15,21 @@ default_numeric_index_t GetFileSize(const std::string &file) { } return stat_buf.st_size; } + +void CopyFile(const std::string &src, const std::string &dst) { + FILE *f1 = fopen(src.c_str(), "rb"); + FILE *f2 = fopen(dst.c_str(), "wb"); + if (f1 == nullptr || f2 == nullptr) { + throw std::runtime_error("fopen failed"); + } + uint8_t buf[1 << 12]; + size_t read_size; + while ((read_size = fread(buf, 1, 1 << 12, f1)) > 0) { + fwrite(buf, 1, read_size, f2); + } + fclose(f1); + fclose(f2); +} struct uint8_t_reader { FILE *f; uint8_t *buf, *p1, *p2; @@ -138,6 +155,7 @@ void GenerateDiff(const std::string &old_file, const std::string &new_file, cons buf.push_back(new_reader()); } } + if (buf.size() == 0) buf.push_back(3); // Step 2 size_t compressed_size_bound = ZSTD_compressBound(buf.size()); uint8_t *compressed_buf = new uint8_t[compressed_size_bound]; @@ -181,6 +199,7 @@ void ApplyPatch(const std::string &old_file, const std::string &diff_file, const while (diff_buf_cnt < decompressed_size) { uint8_t flag; flag = decompressed_buf[diff_buf_cnt++]; + if (flag == 3) goto just_copy; if (flag == 0) { default_numeric_index_t current_diff_len = 0, current_diff_pos = 0, tmp = 0; tmp = decompressed_buf[diff_buf_cnt++]; @@ -266,6 +285,7 @@ void ApplyPatch(const std::string &old_file, const std::string &diff_file, const } } } +just_copy:; if (reader_cursor < old_file_size) { while (reader_cursor < old_file_size) { writer(reader()); @@ -277,4 +297,66 @@ ed:; fclose(fp); fclose(fp2); delete[] decompressed_buf; +} + +void SnapShotManager::InitializeRepository() { + FILE *f = fopen(meta_file.c_str(), "w"); + if (f == nullptr) { + throw std::runtime_error("fopen failed"); + } + fprintf(f, "INIT\n"); + fclose(f); + for (size_t i = 0; i < drivers.size(); i++) { + sjtu::vector files = drivers[i]->ListFiles(); + for (size_t j = 0; j < files.size(); j++) { + std::string frontier_file = files[j].path + ".frontier"; + // generate an empty file + FILE *f = fopen(frontier_file.c_str(), "w"); + if (f == nullptr) { + throw std::runtime_error("fopen failed"); + } + fclose(f); + } + } +} + +void SnapShotManager::CreateSnapShot(const std::string &snap_shot_ID) { + if (!has_set_meta_file) { + throw std::runtime_error("SnapShotManager has not set the meta file"); + } + if (!has_connected) { + throw std::runtime_error("SnapShotManager has not connected to the data drivers"); + } + std::fstream fs(meta_file, std::ios::in | std::ios::out); + std::string HEAD; + fs >> HEAD; + sjtu::vector> snapshot_relationship; + std::string cur, anc; + sjtu::map> son_list; + while (fs >> cur >> anc) { + snapshot_relationship.push_back({cur, anc}); + son_list[anc].push_back(cur); + } + if (son_list.find(snap_shot_ID) != son_list.end()) { + throw std::runtime_error("Snapshot already exists"); + } + // TODO + fs.close(); + fs.open(meta_file, std::ios::in | std::ios::out); + fs << snap_shot_ID << '\n'; + for (size_t i = 0; i < snapshot_relationship.size(); i++) { + fs << snapshot_relationship[i].first << ' ' << snapshot_relationship[i].second << '\n'; + } + fs << snap_shot_ID << " " << HEAD << std::endl; + for (size_t i = 0; i < drivers.size(); i++) { + drivers[i]->Flush(); + sjtu::vector files = drivers[i]->ListFiles(); + for (size_t j = 0; j < files.size(); j++) { + std::string frontier_file = files[j].path + ".frontier"; + std::string diff_file = files[j].path + "." + snap_shot_ID + ".diff"; + GenerateDiff(frontier_file, files[j].path, diff_file); + // then overwrite the frontier file + CopyFile(files[j].path, frontier_file); + } + } } \ No newline at end of file diff --git a/storage/include/storage/disk_map.hpp b/storage/include/storage/disk_map.hpp index 358210a..c18f07b 100644 --- a/storage/include/storage/disk_map.hpp +++ b/storage/include/storage/disk_map.hpp @@ -78,5 +78,9 @@ class DiskMap : public DataDriverBase { indexer->Put(key, data_id); return true; } + void Flush() { + indexer->Flush(); + data_storage->Flush(); + } }; #endif // DISK_MAP_H \ No newline at end of file diff --git a/storage/include/storage/driver.h b/storage/include/storage/driver.h index 5e2caab..58d8047 100644 --- a/storage/include/storage/driver.h +++ b/storage/include/storage/driver.h @@ -13,5 +13,6 @@ class DataDriverBase { DataDriverBase() = default; virtual ~DataDriverBase() = default; virtual sjtu::vector ListFiles() = 0; + virtual void Flush() = 0; }; #endif // DRIVER_H \ No newline at end of file diff --git a/storage/include/storage/single_value_storage.hpp b/storage/include/storage/single_value_storage.hpp index 77d6c0c..523e155 100644 --- a/storage/include/storage/single_value_storage.hpp +++ b/storage/include/storage/single_value_storage.hpp @@ -47,7 +47,10 @@ class SingleValueStorage { ~SingleValueStorage() { if (bpm != nullptr) CloseFile(); } - + void Flush() { + memcpy(raw_mem, &first_blank_element_pair_id, sizeof(size_t)); + bpm->FlushAllPages(); + } void get_info(int &tmp, int n) { if (n > info_len) return; n += 2; diff --git a/test/snapshot_test.cpp b/test/snapshot_test.cpp index d5cb608..0f834cb 100644 --- a/test/snapshot_test.cpp +++ b/test/snapshot_test.cpp @@ -128,4 +128,31 @@ TEST(Basic, T1) { GenerateDiff("/tmp/1.dat", "/tmp/2.dat", "/tmp/diff.dat"); ApplyPatch("/tmp/1.dat", "/tmp/diff.dat", "/tmp/3.dat", false); ApplyPatch("/tmp/2.dat", "/tmp/diff.dat", "/tmp/4.dat", true); +} + +TEST(Basic, T2) { + remove("/tmp/T2/index.db"); + remove("/tmp/T2/data.db"); + remove("/tmp/T2/meta.dat"); + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100000; i++) disk_map.Put(i, i); + snap_shot_manager.CreateSnapShot("snap1"); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100; i += 10) disk_map.Put(i + 3, i); + snap_shot_manager.CreateSnapShot("snap2"); + snap_shot_manager.CreateSnapShot("snap3"); + } } \ No newline at end of file