docs: further optimize key2index

This commit is contained in:
2023-12-07 14:57:43 +00:00
parent 9c5ab581d0
commit 0dfaa80f87

View File

@ -24,28 +24,15 @@ class String2Index {
struct Node { struct Node {
size_t main_hash, sub_hash; size_t main_hash, sub_hash;
int val; int val, nxt_idx = 0;
Node() = default; Node() = default;
Node(std::string str, int _val) Node(std::string str, int _val)
: main_hash(Hash(str)), : main_hash(Hash(str)),
sub_hash(Hash(sub_salt1 + str + sub_salt2)), sub_hash(Hash(sub_salt1 + str + sub_salt2)),
val(_val) {} val(_val) {}
}; };
static const int kNodesPerBlock =
(kPageSize - 3 * sizeof(int)) / sizeof(Node);
struct Block { DriveArray<Node, kBucketSize, 100> mem;
int tot, nxt_idx;
Node data[kNodesPerBlock];
// char padding[kPageSize - 3 * sizeof(int) - sizeof(Node) *
// (kNodesPerBlock)];
Block() : tot(0), nxt_idx(0) {}
Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {}
};
static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error");
static_assert(sizeof(Block) <= kPageSize - 4, "Block Size error");
DriveArray<Block, kBucketSize, 100> mem;
int *hash_table = nullptr; int *hash_table = nullptr;
std::string file_name; std::string file_name;
@ -85,78 +72,54 @@ class String2Index {
void Insert(const std::string &str, int val) noexcept { void Insert(const std::string &str, int val) noexcept {
size_t hash_val = Hash(str); size_t hash_val = Hash(str);
int idx = hash_table[hash_val % kBucketSize]; int idx = hash_table[hash_val % kBucketSize];
Block *blk_ptr = new Block; Node nd(str, val);
if (idx == 0) { if (idx == 0) {
Block __Init_Block; idx = mem.write(nd);
idx = mem.write(__Init_Block);
hash_table[hash_val % kBucketSize] = idx; hash_table[hash_val % kBucketSize] = idx;
assert(idx >= 1); } else {
} nd.nxt_idx = idx;
mem.read(*blk_ptr, idx); idx = mem.write(nd);
if (blk_ptr->tot == kNodesPerBlock) {
Block __New_Head_Block(0, idx);
idx = mem.write(__New_Head_Block);
hash_table[hash_val % kBucketSize] = idx; hash_table[hash_val % kBucketSize] = idx;
mem.read(*blk_ptr, idx);
} }
blk_ptr->data[blk_ptr->tot++] = Node(str, val);
mem.update(*blk_ptr, idx);
delete blk_ptr;
} }
void Delete(const std::string &str, int val) noexcept { void Delete(const std::string &str, int val) noexcept {
size_t hash_val = Hash(str);
int idx = hash_table[hash_val % kBucketSize];
Block *blk_ptr = new Block;
size_t str_main_hash = Hash(str), size_t str_main_hash = Hash(str),
str_sub_hash = Hash(sub_salt1 + str + sub_salt2); str_sub_hash = Hash(sub_salt1 + str + sub_salt2);
int idx = hash_table[str_main_hash % kBucketSize];
Node nd, last_nd;
int last_idx = -1;
while (idx != 0) { while (idx != 0) {
mem.read(*blk_ptr, idx); mem.read(nd, idx);
for (int i = 0; i < blk_ptr->tot; ++i) { if (nd.main_hash == str_main_hash && nd.sub_hash == str_sub_hash &&
if (blk_ptr->data[i].main_hash == str_main_hash && nd.val == val) {
blk_ptr->data[i].sub_hash == str_sub_hash && if (last_idx == -1) {
blk_ptr->data[i].val == val) { hash_table[str_main_hash % kBucketSize] = nd.nxt_idx;
int headidx = hash_table[hash_val % kBucketSize]; mem.Delete(idx);
if (headidx == idx) { } else {
blk_ptr->data[i] = blk_ptr->data[--blk_ptr->tot]; last_nd.nxt_idx = nd.nxt_idx;
mem.update(*blk_ptr, idx); mem.update(last_nd, last_idx);
} else { mem.Delete(idx);
Block *head_blk_ptr = new Block;
mem.read(*head_blk_ptr, headidx);
blk_ptr->data[i] = head_blk_ptr->data[--head_blk_ptr->tot];
if (head_blk_ptr->tot == 0) {
hash_table[hash_val % kBucketSize] = head_blk_ptr->nxt_idx;
mem.Delete(headidx);
} else
mem.update(*head_blk_ptr, headidx);
mem.update(*blk_ptr, idx);
delete head_blk_ptr;
}
delete blk_ptr;
return;
} }
return;
} }
idx = blk_ptr->nxt_idx; last_idx = idx;
last_nd = nd;
idx = nd.nxt_idx;
} }
delete blk_ptr;
} }
std::vector<int> Find(const std::string &str) noexcept { std::vector<int> Find(const std::string &str) noexcept {
std::vector<int> ret; std::vector<int> ret;
size_t hash_val = Hash(str);
int idx = hash_table[hash_val % kBucketSize];
Block *blk_ptr = new Block;
size_t str_main_hash = Hash(str), size_t str_main_hash = Hash(str),
str_sub_hash = Hash(sub_salt1 + str + sub_salt2); str_sub_hash = Hash(sub_salt1 + str + sub_salt2);
int idx = hash_table[str_main_hash % kBucketSize];
Node nd;
while (idx != 0) { while (idx != 0) {
mem.read(*blk_ptr, idx); mem.read(nd, idx);
for (int i = 0; i < blk_ptr->tot; ++i) { if (nd.main_hash == str_main_hash && nd.sub_hash == str_sub_hash) {
if (blk_ptr->data[i].main_hash == str_main_hash && ret.push_back(nd.val);
blk_ptr->data[i].sub_hash == str_sub_hash) {
ret.push_back(blk_ptr->data[i].val);
}
} }
idx = blk_ptr->nxt_idx; idx = nd.nxt_idx;
} }
delete blk_ptr;
return std::move(ret); return std::move(ret);
} }
}; };