write create snap shot

This commit is contained in:
2024-05-04 14:33:12 +00:00
parent 6af4d88535
commit c59775c184
6 changed files with 152 additions and 2 deletions

View File

@ -5,14 +5,28 @@
#include "storage/driver.h"
#include "vector.hpp"
void GenerateDiff(const std::string &old_file, const std::string &new_file, const std::string &diff_file);
void ApplyPatch(const std::string &old_file, const std::string &diff_file, const std::string &new_file, bool is_reverse);
void ApplyPatch(const std::string &old_file, const std::string &diff_file, const std::string &new_file,
bool is_reverse);
/**
* @brief SnapShotManager is a class to manage the snapshot of the data drivers.
*/
/**
The structure of the meta file is as follows:
[HEAD]
[ID1] [Anc1]
...
*/
class SnapShotManager {
bool has_connected = false;
bool has_set_meta_file = false;
sjtu::vector<DataDriverBase *> drivers;
std::string meta_file;
public:
// For safety and simplicity, we delete all the copy/move constructor and copy/move assignment operator. Please
// manager it using smart pointer.
SnapShotManager() = default;
SnapShotManager(const SnapShotManager &) = delete;
SnapShotManager(SnapShotManager &&) = delete;
SnapShotManager &operator=(const SnapShotManager &) = delete;
@ -28,5 +42,24 @@ class SnapShotManager {
drivers = std::move(drivers_);
has_connected = true;
}
inline void SetMetaFile(const std::string &meta_file_) {
if (has_set_meta_file) throw std::runtime_error("SnapShotManager has already set the meta file");
has_set_meta_file = true;
meta_file = meta_file_;
// check if the file exists
FILE *f = fopen(meta_file.c_str(), "r");
if (f == nullptr) {
if (!has_connected)
throw std::runtime_error(
"SnapShotManager has not connected to the data drivers before initializing the repository");
InitializeRepository();
} else {
fclose(f);
}
}
void InitializeRepository();
void CreateSnapShot(const std::string &snap_shot_ID);
void SwitchToSnapShot(const std::string &snap_shot_ID);
void RemoveSnapShot(const std::string &snap_shot_ID);
};
#endif // SNAP_SHOT_H

View File

@ -2,6 +2,8 @@
#include <sys/stat.h>
#include <zstd.h>
#include <cstdint>
#include <fstream>
#include "map.hpp"
#include "storage/config.h"
#include "vector.hpp"
@ -13,6 +15,21 @@ default_numeric_index_t GetFileSize(const std::string &file) {
}
return stat_buf.st_size;
}
void CopyFile(const std::string &src, const std::string &dst) {
FILE *f1 = fopen(src.c_str(), "rb");
FILE *f2 = fopen(dst.c_str(), "wb");
if (f1 == nullptr || f2 == nullptr) {
throw std::runtime_error("fopen failed");
}
uint8_t buf[1 << 12];
size_t read_size;
while ((read_size = fread(buf, 1, 1 << 12, f1)) > 0) {
fwrite(buf, 1, read_size, f2);
}
fclose(f1);
fclose(f2);
}
struct uint8_t_reader {
FILE *f;
uint8_t *buf, *p1, *p2;
@ -138,6 +155,7 @@ void GenerateDiff(const std::string &old_file, const std::string &new_file, cons
buf.push_back(new_reader());
}
}
if (buf.size() == 0) buf.push_back(3);
// Step 2
size_t compressed_size_bound = ZSTD_compressBound(buf.size());
uint8_t *compressed_buf = new uint8_t[compressed_size_bound];
@ -181,6 +199,7 @@ void ApplyPatch(const std::string &old_file, const std::string &diff_file, const
while (diff_buf_cnt < decompressed_size) {
uint8_t flag;
flag = decompressed_buf[diff_buf_cnt++];
if (flag == 3) goto just_copy;
if (flag == 0) {
default_numeric_index_t current_diff_len = 0, current_diff_pos = 0, tmp = 0;
tmp = decompressed_buf[diff_buf_cnt++];
@ -266,6 +285,7 @@ void ApplyPatch(const std::string &old_file, const std::string &diff_file, const
}
}
}
just_copy:;
if (reader_cursor < old_file_size) {
while (reader_cursor < old_file_size) {
writer(reader());
@ -278,3 +298,65 @@ ed:;
fclose(fp2);
delete[] decompressed_buf;
}
void SnapShotManager::InitializeRepository() {
FILE *f = fopen(meta_file.c_str(), "w");
if (f == nullptr) {
throw std::runtime_error("fopen failed");
}
fprintf(f, "INIT\n");
fclose(f);
for (size_t i = 0; i < drivers.size(); i++) {
sjtu::vector<DataDriverBase::FileEntry> files = drivers[i]->ListFiles();
for (size_t j = 0; j < files.size(); j++) {
std::string frontier_file = files[j].path + ".frontier";
// generate an empty file
FILE *f = fopen(frontier_file.c_str(), "w");
if (f == nullptr) {
throw std::runtime_error("fopen failed");
}
fclose(f);
}
}
}
void SnapShotManager::CreateSnapShot(const std::string &snap_shot_ID) {
if (!has_set_meta_file) {
throw std::runtime_error("SnapShotManager has not set the meta file");
}
if (!has_connected) {
throw std::runtime_error("SnapShotManager has not connected to the data drivers");
}
std::fstream fs(meta_file, std::ios::in | std::ios::out);
std::string HEAD;
fs >> HEAD;
sjtu::vector<std::pair<std::string, std::string>> snapshot_relationship;
std::string cur, anc;
sjtu::map<std::string, sjtu::vector<std::string>> son_list;
while (fs >> cur >> anc) {
snapshot_relationship.push_back({cur, anc});
son_list[anc].push_back(cur);
}
if (son_list.find(snap_shot_ID) != son_list.end()) {
throw std::runtime_error("Snapshot already exists");
}
// TODO
fs.close();
fs.open(meta_file, std::ios::in | std::ios::out);
fs << snap_shot_ID << '\n';
for (size_t i = 0; i < snapshot_relationship.size(); i++) {
fs << snapshot_relationship[i].first << ' ' << snapshot_relationship[i].second << '\n';
}
fs << snap_shot_ID << " " << HEAD << std::endl;
for (size_t i = 0; i < drivers.size(); i++) {
drivers[i]->Flush();
sjtu::vector<DataDriverBase::FileEntry> files = drivers[i]->ListFiles();
for (size_t j = 0; j < files.size(); j++) {
std::string frontier_file = files[j].path + ".frontier";
std::string diff_file = files[j].path + "." + snap_shot_ID + ".diff";
GenerateDiff(frontier_file, files[j].path, diff_file);
// then overwrite the frontier file
CopyFile(files[j].path, frontier_file);
}
}
}

View File

@ -78,5 +78,9 @@ class DiskMap : public DataDriverBase {
indexer->Put(key, data_id);
return true;
}
void Flush() {
indexer->Flush();
data_storage->Flush();
}
};
#endif // DISK_MAP_H

View File

@ -13,5 +13,6 @@ class DataDriverBase {
DataDriverBase() = default;
virtual ~DataDriverBase() = default;
virtual sjtu::vector<FileEntry> ListFiles() = 0;
virtual void Flush() = 0;
};
#endif // DRIVER_H

View File

@ -47,7 +47,10 @@ class SingleValueStorage {
~SingleValueStorage() {
if (bpm != nullptr) CloseFile();
}
void Flush() {
memcpy(raw_mem, &first_blank_element_pair_id, sizeof(size_t));
bpm->FlushAllPages();
}
void get_info(int &tmp, int n) {
if (n > info_len) return;
n += 2;

View File

@ -129,3 +129,30 @@ TEST(Basic, T1) {
ApplyPatch("/tmp/1.dat", "/tmp/diff.dat", "/tmp/3.dat", false);
ApplyPatch("/tmp/2.dat", "/tmp/diff.dat", "/tmp/4.dat", true);
}
TEST(Basic, T2) {
remove("/tmp/T2/index.db");
remove("/tmp/T2/data.db");
remove("/tmp/T2/meta.dat");
{
DiskMap<int, int> disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db");
SnapShotManager snap_shot_manager;
sjtu::vector<DataDriverBase *> drivers;
drivers.push_back(&disk_map);
snap_shot_manager.Connect(drivers);
snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat");
for (int i = 0; i < 100000; i++) disk_map.Put(i, i);
snap_shot_manager.CreateSnapShot("snap1");
}
{
DiskMap<int, int> disk_map("index", "/tmp/T2/index.db", "data", "/tmp/T2/data.db");
SnapShotManager snap_shot_manager;
sjtu::vector<DataDriverBase *> drivers;
drivers.push_back(&disk_map);
snap_shot_manager.Connect(drivers);
snap_shot_manager.SetMetaFile("/tmp/T2/meta.dat");
for (int i = 0; i < 100; i += 10) disk_map.Put(i + 3, i);
snap_shot_manager.CreateSnapShot("snap2");
snap_shot_manager.CreateSnapShot("snap3");
}
}