From 01864d5a5ee48ce7f90b2c8d1f8f928a3534eca8 Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Mon, 4 Dec 2023 07:52:18 +0000 Subject: [PATCH] upd: first version of database --- backend/include/database.hpp | 53 ++++++++++++ backend/include/drivearray.hpp | 35 ++++---- backend/include/key2index.hpp | 149 +++++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 14 deletions(-) diff --git a/backend/include/database.hpp b/backend/include/database.hpp index e69de29..5e3ebbb 100644 --- a/backend/include/database.hpp +++ b/backend/include/database.hpp @@ -0,0 +1,53 @@ +#ifndef BPT_DATABASE_HPP +#define BPT_DATABASE_HPP +#include +#include + +#include "drivearray.hpp" +#include "key2index.hpp" + +template +class DriveMultiMap_string { + private: + String2Index Indexer; + DriveArray Storage; + bool is_open = false; + + public: + DriveMultiMap_string() = default; + void OpenFile(const std::string __file_name) noexcept { + Indexer.OpenFile(__file_name + ".idx"); + Storage.OpenFile(__file_name + ".dat"); + is_open = true; + } + std::vector Find(const std::string &key) noexcept { + if (!is_open) return {}; + std::vector ret; + std::vector idxs = std::move(Indexer.Find(key)); + for (auto idx : idxs) { + StorageType tmp; + Storage.read(tmp, idx); + ret.push_back(tmp); + } + return std::move(ret); + } + void Delete(const std::string &key, const StorageType &value) noexcept { + if (!is_open) return; + std::vector idxs = std::move(Indexer.Find(key)); + for (auto idx : idxs) { + StorageType tmp; + Storage.read(tmp, idx); + if (tmp == value) { + Storage.Delete(idx); + Indexer.Delete(key, idx); + return; + } + } + } + void Insert(const std::string &key, StorageType &value) noexcept { + if (!is_open) return; + int idx = Storage.write(value); + Indexer.Insert(key, idx); + } +}; +#endif // BPT_DATABASE_HPP \ No newline at end of file diff --git a/backend/include/drivearray.hpp b/backend/include/drivearray.hpp index f26d8d4..265a578 100644 --- a/backend/include/drivearray.hpp +++ b/backend/include/drivearray.hpp @@ -13,7 +13,7 @@ #include #include -template +template class DriveArray { private: static const int kPageSize = 4096; @@ -26,10 +26,8 @@ class DriveArray { ((info_len + 2) * sizeof(int) + kPageSize - 1) / kPageSize * kPageSize; std::stack free_mem; int total_mem = 0; - const int kRefreshThreshold = 100; unsigned int forced_refresh = 0; - std::mutex mtx; - void reallocate(bool include_resync = false) { + void reallocate(bool include_resync = false) noexcept { size_t length_needed = raw_data_begin + (sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize; @@ -43,7 +41,7 @@ class DriveArray { virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0); } - void ForceRefresh() { + void ForceRefresh() noexcept { munmap(virtual_mem, file_length); virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0); @@ -51,6 +49,7 @@ class DriveArray { public: DriveArray() = default; + inline bool IsOpen() const noexcept { return file_descriptor >= 0; } ~DriveArray() { reallocate(true); int stk_data_begin = @@ -65,11 +64,17 @@ class DriveArray { } munmap(virtual_mem, file_length); close(file_descriptor); + file_descriptor = -1; } bool operator=(const DriveArray &) = delete; - DriveArray(const std::string &file_name) : file_name(file_name) { + void OpenFile(const std::string &file_name) { if (file_name == "") return; + if (file_descriptor >= 0) { + munmap(virtual_mem, file_length); + close(file_descriptor); + file_descriptor = -1; + } file_descriptor = open(file_name.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); struct stat file_state; @@ -93,8 +98,9 @@ class DriveArray { for (int i = 0; i < free_mem_cnt; i++) { free_mem.push(*(p++)); } - // madvise(virtual_mem + stk_data_begin, free_mem_cnt * sizeof(int), - // MADV_FREE); + } + DriveArray(const std::string &file_name) : file_name(file_name) { + OpenFile(file_name); } void initialise(std::string FN = "") { @@ -102,6 +108,7 @@ class DriveArray { if (file_descriptor >= 0) { munmap(virtual_mem, file_length); close(file_descriptor); + file_descriptor = -1; } file_descriptor = open(file_name.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); @@ -115,7 +122,7 @@ class DriveArray { for (int i = 0; i < info_len; i++) *((int *)(virtual_mem) + i) = 0; } - void get_info(int &tmp, int n) { + void get_info(int &tmp, int n) noexcept { if (n > info_len) return; tmp = *((int *)(virtual_mem) + n - 1); if (++forced_refresh >= kRefreshThreshold) { @@ -124,7 +131,7 @@ class DriveArray { } } - void write_info(int tmp, int n) { + void write_info(int tmp, int n) noexcept { if (n > info_len) return; *((int *)(virtual_mem) + n - 1) = tmp; if (++forced_refresh >= kRefreshThreshold) { @@ -133,7 +140,7 @@ class DriveArray { } } - int write(T &t) { + int write(T &t) noexcept { int index = -1; if (!free_mem.empty()) { index = free_mem.top(); @@ -144,7 +151,7 @@ class DriveArray { return index; } - void update(T &t, const int index) { + void update(T &t, const int index) noexcept { reallocate(); void *data_begin = virtual_mem + raw_data_begin + sizeofT * (index - 1); std::memmove(data_begin, &t, sizeofT); @@ -155,7 +162,7 @@ class DriveArray { } } - void read(T &t, const int index) { + void read(T &t, const int index) noexcept { reallocate(); void *data_begin = virtual_mem + raw_data_begin + sizeofT * (index - 1); std::memmove(&t, data_begin, sizeofT); @@ -166,7 +173,7 @@ class DriveArray { } } - void Delete(int index) { free_mem.push(index); } + void Delete(int index) noexcept { free_mem.push(index); } }; #endif // BPT_DriveArray_HPP \ No newline at end of file diff --git a/backend/include/key2index.hpp b/backend/include/key2index.hpp index e69de29..49c39e9 100644 --- a/backend/include/key2index.hpp +++ b/backend/include/key2index.hpp @@ -0,0 +1,149 @@ +#ifndef BPT_KEY2INDEX_HPP +#define BPT_KEY2INDEX_HPP +#include +#include +#include +#include + +#include "drivearray.hpp" +class String2Index { + private: + static const int kBucketSize = 262142; + static const int kPageSize = 4096; + + struct Node { + char str[66]; + int val; + Node() = default; + Node(const std::string &_str, int _val) : val(_val) { + assert(_str.length() <= 64); + strcpy(str, _str.c_str()); + } + }; + static const int kNodesPerBlock = + (kPageSize - 2 * sizeof(int)) / sizeof(Node); + + struct Block { + int tot, nxt_idx; + Node data[kNodesPerBlock]; + char padding[kPageSize - 2 * sizeof(int) - sizeof(Node) * (kNodesPerBlock)]; + Block() : tot(0), nxt_idx(0) {} + Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {} + }; + static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error"); + static_assert(sizeof(Block) == kPageSize, "Block Size error"); + + DriveArray mem; + int *hash_table = nullptr; + std::string file_name; + + inline size_t Hash(std::string str) noexcept { + const static std::string salt1 = "mL;]-=eT"; + const static std::string salt2 = "9B= 1); + } + mem.read(*blk_ptr, idx); + if (blk_ptr->tot == kNodesPerBlock) { + Block __New_Head_Block(0, idx); + idx = mem.write(__New_Head_Block); + hash_table[hash_val % kBucketSize] = idx; + mem.read(*blk_ptr, idx); + } + blk_ptr->data[blk_ptr->tot++] = Node(str, val); + mem.update(*blk_ptr, idx); + delete blk_ptr; + } + void Delete(const std::string &str, int val) noexcept { + size_t hash_val = Hash(str); + int idx = hash_table[hash_val % kBucketSize]; + Block *blk_ptr = new Block; + while (idx != 0) { + mem.read(*blk_ptr, idx); + for (int i = 0; i < blk_ptr->tot; ++i) { + if (blk_ptr->data[i].str == str && blk_ptr->data[i].val == val) { + int headidx = hash_table[hash_val % kBucketSize]; + if (headidx == idx) { + blk_ptr->data[i] = blk_ptr->data[--blk_ptr->tot]; + mem.update(*blk_ptr, idx); + } else { + Block *head_blk_ptr = new Block; + mem.read(*head_blk_ptr, headidx); + blk_ptr->data[i] = head_blk_ptr->data[--head_blk_ptr->tot]; + if (head_blk_ptr->tot == 0) { + hash_table[hash_val % kBucketSize] = head_blk_ptr->nxt_idx; + mem.Delete(headidx); + } else + mem.update(*head_blk_ptr, headidx); + mem.update(*blk_ptr, idx); + delete head_blk_ptr; + } + delete blk_ptr; + return; + } + } + idx = blk_ptr->nxt_idx; + } + delete blk_ptr; + } + std::vector Find(const std::string &str) noexcept { + std::vector ret; + size_t hash_val = Hash(str); + int idx = hash_table[hash_val % kBucketSize]; + Block *blk_ptr = new Block; + while (idx != 0) { + mem.read(*blk_ptr, idx); + for (int i = 0; i < blk_ptr->tot; ++i) { + if (blk_ptr->data[i].str == str) { + ret.push_back(blk_ptr->data[i].val); + } + } + idx = blk_ptr->nxt_idx; + } + delete blk_ptr; + return std::move(ret); + } +}; +#endif // BPT_KEY2INDEX_HPP \ No newline at end of file