From 0dfaa80f87f7135dd9c393b9070969818999fb73 Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Thu, 7 Dec 2023 14:57:43 +0000 Subject: [PATCH] docs: further optimize key2index --- backend/include/key2index.hpp | 97 +++++++++++------------------------ 1 file changed, 30 insertions(+), 67 deletions(-) diff --git a/backend/include/key2index.hpp b/backend/include/key2index.hpp index 322a66f..55de193 100644 --- a/backend/include/key2index.hpp +++ b/backend/include/key2index.hpp @@ -24,28 +24,15 @@ class String2Index { struct Node { size_t main_hash, sub_hash; - int val; + int val, nxt_idx = 0; Node() = default; Node(std::string str, int _val) : main_hash(Hash(str)), sub_hash(Hash(sub_salt1 + str + sub_salt2)), val(_val) {} }; - static const int kNodesPerBlock = - (kPageSize - 3 * sizeof(int)) / sizeof(Node); - struct Block { - int tot, nxt_idx; - Node data[kNodesPerBlock]; - // char padding[kPageSize - 3 * sizeof(int) - sizeof(Node) * - // (kNodesPerBlock)]; - Block() : tot(0), nxt_idx(0) {} - Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {} - }; - static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error"); - static_assert(sizeof(Block) <= kPageSize - 4, "Block Size error"); - - DriveArray mem; + DriveArray mem; int *hash_table = nullptr; std::string file_name; @@ -85,78 +72,54 @@ class String2Index { void Insert(const std::string &str, int val) noexcept { size_t hash_val = Hash(str); int idx = hash_table[hash_val % kBucketSize]; - Block *blk_ptr = new Block; + Node nd(str, val); if (idx == 0) { - Block __Init_Block; - idx = mem.write(__Init_Block); + idx = mem.write(nd); hash_table[hash_val % kBucketSize] = idx; - assert(idx >= 1); - } - mem.read(*blk_ptr, idx); - if (blk_ptr->tot == kNodesPerBlock) { - Block __New_Head_Block(0, idx); - idx = mem.write(__New_Head_Block); + } else { + nd.nxt_idx = idx; + idx = mem.write(nd); hash_table[hash_val % kBucketSize] = idx; - mem.read(*blk_ptr, idx); } - blk_ptr->data[blk_ptr->tot++] = Node(str, val); - mem.update(*blk_ptr, idx); - delete blk_ptr; } void Delete(const std::string &str, int val) noexcept { - size_t hash_val = Hash(str); - int idx = hash_table[hash_val % kBucketSize]; - Block *blk_ptr = new Block; size_t str_main_hash = Hash(str), str_sub_hash = Hash(sub_salt1 + str + sub_salt2); + int idx = hash_table[str_main_hash % kBucketSize]; + Node nd, last_nd; + int last_idx = -1; while (idx != 0) { - mem.read(*blk_ptr, idx); - for (int i = 0; i < blk_ptr->tot; ++i) { - if (blk_ptr->data[i].main_hash == str_main_hash && - blk_ptr->data[i].sub_hash == str_sub_hash && - blk_ptr->data[i].val == val) { - int headidx = hash_table[hash_val % kBucketSize]; - if (headidx == idx) { - blk_ptr->data[i] = blk_ptr->data[--blk_ptr->tot]; - mem.update(*blk_ptr, idx); - } else { - Block *head_blk_ptr = new Block; - mem.read(*head_blk_ptr, headidx); - blk_ptr->data[i] = head_blk_ptr->data[--head_blk_ptr->tot]; - if (head_blk_ptr->tot == 0) { - hash_table[hash_val % kBucketSize] = head_blk_ptr->nxt_idx; - mem.Delete(headidx); - } else - mem.update(*head_blk_ptr, headidx); - mem.update(*blk_ptr, idx); - delete head_blk_ptr; - } - delete blk_ptr; - return; + mem.read(nd, idx); + if (nd.main_hash == str_main_hash && nd.sub_hash == str_sub_hash && + nd.val == val) { + if (last_idx == -1) { + hash_table[str_main_hash % kBucketSize] = nd.nxt_idx; + mem.Delete(idx); + } else { + last_nd.nxt_idx = nd.nxt_idx; + mem.update(last_nd, last_idx); + mem.Delete(idx); } + return; } - idx = blk_ptr->nxt_idx; + last_idx = idx; + last_nd = nd; + idx = nd.nxt_idx; } - delete blk_ptr; } std::vector Find(const std::string &str) noexcept { std::vector ret; - size_t hash_val = Hash(str); - int idx = hash_table[hash_val % kBucketSize]; - Block *blk_ptr = new Block; size_t str_main_hash = Hash(str), str_sub_hash = Hash(sub_salt1 + str + sub_salt2); + int idx = hash_table[str_main_hash % kBucketSize]; + Node nd; while (idx != 0) { - mem.read(*blk_ptr, idx); - for (int i = 0; i < blk_ptr->tot; ++i) { - if (blk_ptr->data[i].main_hash == str_main_hash && - blk_ptr->data[i].sub_hash == str_sub_hash) { - ret.push_back(blk_ptr->data[i].val); - } + mem.read(nd, idx); + if (nd.main_hash == str_main_hash && nd.sub_hash == str_sub_hash) { + ret.push_back(nd.val); } - idx = blk_ptr->nxt_idx; + idx = nd.nxt_idx; } - delete blk_ptr; return std::move(ret); } };