docs: further optimize key2index
This commit is contained in:
@ -24,28 +24,15 @@ class String2Index {
|
|||||||
|
|
||||||
struct Node {
|
struct Node {
|
||||||
size_t main_hash, sub_hash;
|
size_t main_hash, sub_hash;
|
||||||
int val;
|
int val, nxt_idx = 0;
|
||||||
Node() = default;
|
Node() = default;
|
||||||
Node(std::string str, int _val)
|
Node(std::string str, int _val)
|
||||||
: main_hash(Hash(str)),
|
: main_hash(Hash(str)),
|
||||||
sub_hash(Hash(sub_salt1 + str + sub_salt2)),
|
sub_hash(Hash(sub_salt1 + str + sub_salt2)),
|
||||||
val(_val) {}
|
val(_val) {}
|
||||||
};
|
};
|
||||||
static const int kNodesPerBlock =
|
|
||||||
(kPageSize - 3 * sizeof(int)) / sizeof(Node);
|
|
||||||
|
|
||||||
struct Block {
|
DriveArray<Node, kBucketSize, 100> mem;
|
||||||
int tot, nxt_idx;
|
|
||||||
Node data[kNodesPerBlock];
|
|
||||||
// char padding[kPageSize - 3 * sizeof(int) - sizeof(Node) *
|
|
||||||
// (kNodesPerBlock)];
|
|
||||||
Block() : tot(0), nxt_idx(0) {}
|
|
||||||
Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {}
|
|
||||||
};
|
|
||||||
static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error");
|
|
||||||
static_assert(sizeof(Block) <= kPageSize - 4, "Block Size error");
|
|
||||||
|
|
||||||
DriveArray<Block, kBucketSize, 100> mem;
|
|
||||||
int *hash_table = nullptr;
|
int *hash_table = nullptr;
|
||||||
std::string file_name;
|
std::string file_name;
|
||||||
|
|
||||||
@ -85,78 +72,54 @@ class String2Index {
|
|||||||
void Insert(const std::string &str, int val) noexcept {
|
void Insert(const std::string &str, int val) noexcept {
|
||||||
size_t hash_val = Hash(str);
|
size_t hash_val = Hash(str);
|
||||||
int idx = hash_table[hash_val % kBucketSize];
|
int idx = hash_table[hash_val % kBucketSize];
|
||||||
Block *blk_ptr = new Block;
|
Node nd(str, val);
|
||||||
if (idx == 0) {
|
if (idx == 0) {
|
||||||
Block __Init_Block;
|
idx = mem.write(nd);
|
||||||
idx = mem.write(__Init_Block);
|
|
||||||
hash_table[hash_val % kBucketSize] = idx;
|
hash_table[hash_val % kBucketSize] = idx;
|
||||||
assert(idx >= 1);
|
} else {
|
||||||
}
|
nd.nxt_idx = idx;
|
||||||
mem.read(*blk_ptr, idx);
|
idx = mem.write(nd);
|
||||||
if (blk_ptr->tot == kNodesPerBlock) {
|
|
||||||
Block __New_Head_Block(0, idx);
|
|
||||||
idx = mem.write(__New_Head_Block);
|
|
||||||
hash_table[hash_val % kBucketSize] = idx;
|
hash_table[hash_val % kBucketSize] = idx;
|
||||||
mem.read(*blk_ptr, idx);
|
|
||||||
}
|
}
|
||||||
blk_ptr->data[blk_ptr->tot++] = Node(str, val);
|
|
||||||
mem.update(*blk_ptr, idx);
|
|
||||||
delete blk_ptr;
|
|
||||||
}
|
}
|
||||||
void Delete(const std::string &str, int val) noexcept {
|
void Delete(const std::string &str, int val) noexcept {
|
||||||
size_t hash_val = Hash(str);
|
|
||||||
int idx = hash_table[hash_val % kBucketSize];
|
|
||||||
Block *blk_ptr = new Block;
|
|
||||||
size_t str_main_hash = Hash(str),
|
size_t str_main_hash = Hash(str),
|
||||||
str_sub_hash = Hash(sub_salt1 + str + sub_salt2);
|
str_sub_hash = Hash(sub_salt1 + str + sub_salt2);
|
||||||
|
int idx = hash_table[str_main_hash % kBucketSize];
|
||||||
|
Node nd, last_nd;
|
||||||
|
int last_idx = -1;
|
||||||
while (idx != 0) {
|
while (idx != 0) {
|
||||||
mem.read(*blk_ptr, idx);
|
mem.read(nd, idx);
|
||||||
for (int i = 0; i < blk_ptr->tot; ++i) {
|
if (nd.main_hash == str_main_hash && nd.sub_hash == str_sub_hash &&
|
||||||
if (blk_ptr->data[i].main_hash == str_main_hash &&
|
nd.val == val) {
|
||||||
blk_ptr->data[i].sub_hash == str_sub_hash &&
|
if (last_idx == -1) {
|
||||||
blk_ptr->data[i].val == val) {
|
hash_table[str_main_hash % kBucketSize] = nd.nxt_idx;
|
||||||
int headidx = hash_table[hash_val % kBucketSize];
|
mem.Delete(idx);
|
||||||
if (headidx == idx) {
|
} else {
|
||||||
blk_ptr->data[i] = blk_ptr->data[--blk_ptr->tot];
|
last_nd.nxt_idx = nd.nxt_idx;
|
||||||
mem.update(*blk_ptr, idx);
|
mem.update(last_nd, last_idx);
|
||||||
} else {
|
mem.Delete(idx);
|
||||||
Block *head_blk_ptr = new Block;
|
|
||||||
mem.read(*head_blk_ptr, headidx);
|
|
||||||
blk_ptr->data[i] = head_blk_ptr->data[--head_blk_ptr->tot];
|
|
||||||
if (head_blk_ptr->tot == 0) {
|
|
||||||
hash_table[hash_val % kBucketSize] = head_blk_ptr->nxt_idx;
|
|
||||||
mem.Delete(headidx);
|
|
||||||
} else
|
|
||||||
mem.update(*head_blk_ptr, headidx);
|
|
||||||
mem.update(*blk_ptr, idx);
|
|
||||||
delete head_blk_ptr;
|
|
||||||
}
|
|
||||||
delete blk_ptr;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
idx = blk_ptr->nxt_idx;
|
last_idx = idx;
|
||||||
|
last_nd = nd;
|
||||||
|
idx = nd.nxt_idx;
|
||||||
}
|
}
|
||||||
delete blk_ptr;
|
|
||||||
}
|
}
|
||||||
std::vector<int> Find(const std::string &str) noexcept {
|
std::vector<int> Find(const std::string &str) noexcept {
|
||||||
std::vector<int> ret;
|
std::vector<int> ret;
|
||||||
size_t hash_val = Hash(str);
|
|
||||||
int idx = hash_table[hash_val % kBucketSize];
|
|
||||||
Block *blk_ptr = new Block;
|
|
||||||
size_t str_main_hash = Hash(str),
|
size_t str_main_hash = Hash(str),
|
||||||
str_sub_hash = Hash(sub_salt1 + str + sub_salt2);
|
str_sub_hash = Hash(sub_salt1 + str + sub_salt2);
|
||||||
|
int idx = hash_table[str_main_hash % kBucketSize];
|
||||||
|
Node nd;
|
||||||
while (idx != 0) {
|
while (idx != 0) {
|
||||||
mem.read(*blk_ptr, idx);
|
mem.read(nd, idx);
|
||||||
for (int i = 0; i < blk_ptr->tot; ++i) {
|
if (nd.main_hash == str_main_hash && nd.sub_hash == str_sub_hash) {
|
||||||
if (blk_ptr->data[i].main_hash == str_main_hash &&
|
ret.push_back(nd.val);
|
||||||
blk_ptr->data[i].sub_hash == str_sub_hash) {
|
|
||||||
ret.push_back(blk_ptr->data[i].val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
idx = blk_ptr->nxt_idx;
|
idx = nd.nxt_idx;
|
||||||
}
|
}
|
||||||
delete blk_ptr;
|
|
||||||
return std::move(ret);
|
return std::move(ret);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user