diff --git a/dataguard/CMakeLists.txt b/dataguard/CMakeLists.txt index 223b3b4..34f875d 100644 --- a/dataguard/CMakeLists.txt +++ b/dataguard/CMakeLists.txt @@ -1,2 +1,2 @@ add_library(dataguard STATIC src/snapshot.cpp src/txn_logger.cpp) -target_link_libraries(dataguard libzstd_static storage) \ No newline at end of file +target_link_libraries(dataguard libzstd_static storage spdlog::spdlog) \ No newline at end of file diff --git a/dataguard/include/dataguard/snapshot.h b/dataguard/include/dataguard/snapshot.h index 5f6977a..384089b 100644 --- a/dataguard/include/dataguard/snapshot.h +++ b/dataguard/include/dataguard/snapshot.h @@ -1,12 +1,19 @@ #ifndef SNAP_SHOT_H #define SNAP_SHOT_H +#include +#include +#include +#include +#include #include +#include "list.hpp" #include "map.hpp" #include "storage/driver.h" #include "vector.hpp" void GenerateDiff(const std::string &old_file, const std::string &new_file, const std::string &diff_file); void ApplyPatch(const std::string &old_file, const std::string &diff_file, const std::string &new_file, bool is_reverse); +void CopyFile(const std::string &src, const std::string &dst); /** * @brief SnapShotManager is a class to manage the snapshot of the data drivers. */ @@ -22,6 +29,87 @@ class SnapShotManager { bool has_set_meta_file = false; sjtu::vector drivers; std::string meta_file; + std::shared_ptr logger_ptr; + struct WayEntry { + std::string snap_ID; + std::string diff_ID; + bool is_reverse; + }; + friend void ApplyLongChange(const std::string &old_file, const std::string &new_file, + const sjtu::vector &way); + inline sjtu::vector FindWay(std::string dest) { + if (!has_set_meta_file) { + throw std::runtime_error("SnapShotManager has not set the meta file"); + } + if (!has_connected) { + throw std::runtime_error("SnapShotManager has not connected to the data drivers"); + } + std::fstream fs(meta_file, std::ios::in); + std::string HEAD; + fs >> HEAD; + std::string cur, anc; + sjtu::map> son_list; + sjtu::map get_anc; + while (fs >> cur >> anc) { + son_list[anc].push_back(cur); + get_anc[cur] = anc; + } + if (son_list.find(dest) == son_list.end() && get_anc.find(dest) == get_anc.end()) { + throw std::runtime_error("unable to find destination"); + } + sjtu::vector res; + if (HEAD == dest) return res; + sjtu::list Q; + sjtu::map visit_record; + Q.push_back(dest); + while (!Q.empty()) { + std::string cur = Q.front(); + Q.pop_front(); + if (get_anc.find(cur) != get_anc.end()) { + std::string v = get_anc[cur]; + if (visit_record.find(v) == visit_record.end()) { + visit_record[v] = {cur, cur, false}; + if (v == HEAD) goto ed; + Q.push_back(v); + } + } + if (son_list.find(cur) != son_list.end()) { + auto &s_l = son_list[cur]; + for (size_t j = 0; j < s_l.size(); j++) { + std::string v = s_l[j]; + if (visit_record.find(v) == visit_record.end()) { + visit_record[v] = {cur, v, true}; + if (v == HEAD) goto ed; + Q.push_back(v); + } + } + } + } + ed:; + std::string tmp = HEAD; + while (tmp != dest) { + res.push_back(visit_record[tmp]); + tmp = visit_record[tmp].snap_ID; + } + return res; + } + inline void ApplyLongChange(const std::string &old_file, const std::string &new_file, + const sjtu::vector &way, const std::string &file_name_base) { + CopyFile(old_file, new_file); + for (size_t i = 0; i < way.size(); i++) { + if (logger_ptr) { + logger_ptr->info("Applying diff {} to {} with inverse mark {}", file_name_base + "." + way[i].diff_ID + ".diff", + new_file, way[i].is_reverse); + } + ApplyPatch(new_file, file_name_base + "." + way[i].diff_ID + ".diff", new_file + ".tmp", way[i].is_reverse); + remove(new_file.c_str()); + rename((new_file + ".tmp").c_str(), new_file.c_str()); + if (logger_ptr) { + logger_ptr->info("Applied diff {} to {} with inverse mark {}", file_name_base + "." + way[i].diff_ID + ".diff", + new_file, way[i].is_reverse); + } + } + } public: // For safety and simplicity, we delete all the copy/move constructor and copy/move assignment operator. Please @@ -42,6 +130,7 @@ class SnapShotManager { drivers = std::move(drivers_); has_connected = true; } + inline void SetLogger(const std::shared_ptr &logger_ptr_) { logger_ptr = logger_ptr_; } inline void SetMetaFile(const std::string &meta_file_) { if (has_set_meta_file) throw std::runtime_error("SnapShotManager has already set the meta file"); has_set_meta_file = true; @@ -59,6 +148,7 @@ class SnapShotManager { } void InitializeRepository(); void CreateSnapShot(const std::string &snap_shot_ID); + void CheckOutFrontier(); void SwitchToSnapShot(const std::string &snap_shot_ID); void RemoveSnapShot(const std::string &snap_shot_ID); }; diff --git a/dataguard/src/snapshot.cpp b/dataguard/src/snapshot.cpp index ae87392..465173b 100644 --- a/dataguard/src/snapshot.cpp +++ b/dataguard/src/snapshot.cpp @@ -333,14 +333,15 @@ void SnapShotManager::CreateSnapShot(const std::string &snap_shot_ID) { sjtu::vector> snapshot_relationship; std::string cur, anc; sjtu::map> son_list; + sjtu::map get_anc; while (fs >> cur >> anc) { snapshot_relationship.push_back({cur, anc}); son_list[anc].push_back(cur); + get_anc[cur] = anc; } - if (son_list.find(snap_shot_ID) != son_list.end()) { + if (son_list.find(snap_shot_ID) != son_list.end() || get_anc.find(snap_shot_ID) != get_anc.end()) { throw std::runtime_error("Snapshot already exists"); } - // TODO fs.close(); fs.open(meta_file, std::ios::in | std::ios::out); fs << snap_shot_ID << '\n'; @@ -359,4 +360,82 @@ void SnapShotManager::CreateSnapShot(const std::string &snap_shot_ID) { CopyFile(files[j].path, frontier_file); } } +} + +void SnapShotManager::CheckOutFrontier() { + if (!has_set_meta_file) { + throw std::runtime_error("SnapShotManager has not set the meta file"); + } + if (!has_connected) { + throw std::runtime_error("SnapShotManager has not connected to the data drivers"); + } + if (logger_ptr) { + logger_ptr->info("Checking out frontier"); + } + std::fstream fs(meta_file, std::ios::in); + std::string HEAD; + fs >> HEAD; + for (size_t i = 0; i < drivers.size(); i++) { + drivers[i]->Flush(); + drivers[i]->LockDownForCheckOut(); + if (logger_ptr) { + logger_ptr->info("flushed and locked down driver {}", i); + } + sjtu::vector files = drivers[i]->ListFiles(); + for (size_t j = 0; j < files.size(); j++) { + if (HEAD == "INIT") { + remove(files[j].path.c_str()); + continue; + } + std::string frontier_file = files[j].path + ".frontier"; + // then overwrite the frontier file + CopyFile(frontier_file, files[j].path); + } + } +} + +void SnapShotManager::SwitchToSnapShot(const std::string &snap_shot_ID) { + if (!has_set_meta_file) { + throw std::runtime_error("SnapShotManager has not set the meta file"); + } + if (!has_connected) { + throw std::runtime_error("SnapShotManager has not connected to the data drivers"); + } + if (logger_ptr) { + logger_ptr->info("Try switching to snapshot {}", snap_shot_ID); + } + sjtu::vector way = std::move(FindWay(snap_shot_ID)); + if (logger_ptr) { + logger_ptr->info("Successfully found the way"); + } + for (size_t i = 0; i < drivers.size(); i++) { + drivers[i]->Flush(); + sjtu::vector files = drivers[i]->ListFiles(); + for (size_t j = 0; j < files.size(); j++) { + std::string frontier_file = files[j].path + ".frontier"; + if (logger_ptr) { + logger_ptr->info("applying changes to {}", frontier_file); + } + ApplyLongChange(frontier_file, frontier_file + ".tmp", way, files[j].path); + remove(frontier_file.c_str()); + rename((frontier_file + ".tmp").c_str(), frontier_file.c_str()); + if (logger_ptr) { + logger_ptr->info("successfully applied changes to {}", frontier_file); + } + } + } + std::fstream fs(meta_file, std::ios::in | std::ios::out); + std::string HEAD; + fs >> HEAD; + sjtu::vector> snapshot_relationship; + std::string cur, anc; + while (fs >> cur >> anc) { + snapshot_relationship.push_back({cur, anc}); + } + fs.close(); + fs.open(meta_file, std::ios::in | std::ios::out); + fs << snap_shot_ID << '\n'; + for (size_t i = 0; i < snapshot_relationship.size(); i++) { + fs << snapshot_relationship[i].first << ' ' << snapshot_relationship[i].second << '\n'; + } } \ No newline at end of file diff --git a/storage/include/storage/disk_map.hpp b/storage/include/storage/disk_map.hpp index c18f07b..8b2b3d2 100644 --- a/storage/include/storage/disk_map.hpp +++ b/storage/include/storage/disk_map.hpp @@ -53,6 +53,20 @@ class DiskMap : public DataDriverBase { res.push_back({data_file_identifier, data_file_path, data_disk_manager}); return res; } + void LockDownForCheckOut() override { + delete indexer; + delete index_bpm; + delete index_disk_manager; + delete data_storage; + delete data_bpm; + delete data_disk_manager; + indexer = nullptr; + index_bpm = nullptr; + index_disk_manager = nullptr; + data_storage = nullptr; + data_bpm = nullptr; + data_disk_manager = nullptr; + } Value Get(const Key &key) { size_t data_id; if ((data_id = indexer->Get(key)) == kInvalidValueIndex) throw std::runtime_error("Key not found"); diff --git a/storage/include/storage/driver.h b/storage/include/storage/driver.h index 58d8047..495c24e 100644 --- a/storage/include/storage/driver.h +++ b/storage/include/storage/driver.h @@ -14,5 +14,6 @@ class DataDriverBase { virtual ~DataDriverBase() = default; virtual sjtu::vector ListFiles() = 0; virtual void Flush() = 0; + virtual void LockDownForCheckOut() = 0; }; #endif // DRIVER_H \ No newline at end of file diff --git a/test/snapshot_test.cpp b/test/snapshot_test.cpp index 0f834cb..ed05b4e 100644 --- a/test/snapshot_test.cpp +++ b/test/snapshot_test.cpp @@ -131,6 +131,8 @@ TEST(Basic, T1) { } TEST(Basic, T2) { + std::shared_ptr logger_ptr = spdlog::stderr_color_mt("stderr_logger"); + mkdir("/tmp/T2", 0700); remove("/tmp/T2/index.db"); remove("/tmp/T2/data.db"); remove("/tmp/T2/meta.dat"); @@ -140,6 +142,7 @@ TEST(Basic, T2) { sjtu::vector drivers; drivers.push_back(&disk_map); snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); for (int i = 0; i < 100000; i++) disk_map.Put(i, i); snap_shot_manager.CreateSnapShot("snap1"); @@ -150,9 +153,89 @@ TEST(Basic, T2) { sjtu::vector drivers; drivers.push_back(&disk_map); snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); - for (int i = 0; i < 100; i += 10) disk_map.Put(i + 3, i); + for (int i = 0; i < 100; i += 10) { + int tmp = i + 3; + disk_map.Put(i, tmp); + } snap_shot_manager.CreateSnapShot("snap2"); + snap_shot_manager.SwitchToSnapShot("INIT"); + snap_shot_manager.CheckOutFrontier(); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + snap_shot_manager.SwitchToSnapShot("snap1"); + snap_shot_manager.CheckOutFrontier(); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100000; i++) EXPECT_EQ(disk_map.Get(i), i); + snap_shot_manager.SwitchToSnapShot("snap2"); + snap_shot_manager.CheckOutFrontier(); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100; i += 10) EXPECT_EQ(disk_map.Get(i), i + 3); + snap_shot_manager.SwitchToSnapShot("INIT"); + snap_shot_manager.CheckOutFrontier(); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100; i += 10) { + int tmp = i + 4; + disk_map.Put(i, tmp); + } snap_shot_manager.CreateSnapShot("snap3"); + snap_shot_manager.SwitchToSnapShot("snap2"); + snap_shot_manager.CheckOutFrontier(); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100; i += 10) EXPECT_EQ(disk_map.Get(i), i + 3); + snap_shot_manager.SwitchToSnapShot("snap3"); + snap_shot_manager.CheckOutFrontier(); + } + { + DiskMap disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db"); + SnapShotManager snap_shot_manager; + sjtu::vector drivers; + drivers.push_back(&disk_map); + snap_shot_manager.Connect(drivers); + snap_shot_manager.SetLogger(logger_ptr); + snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat"); + for (int i = 0; i < 100; i += 10) EXPECT_EQ(disk_map.Get(i), i + 4); + snap_shot_manager.SwitchToSnapShot("INIT"); + snap_shot_manager.CheckOutFrontier(); } } \ No newline at end of file