finish writing remove, ready to deeeeeeeeeeeeeeeeeeeeeeeeeebug

This commit is contained in:
2024-04-29 12:19:46 +00:00
parent 1632870656
commit bf76de6590
5 changed files with 465 additions and 36 deletions

View File

@ -32,7 +32,7 @@ else()
add_definitions(-DGIT_COMMIT_HASH="[developing]") add_definitions(-DGIT_COMMIT_HASH="[developing]")
endif() endif()
# 设置一个布尔类型的选项,用于控制是否启用高级功能,如程序日志、并发、快照等 # 设置一个布尔类型的选项,用于控制是否启用高级功能,如日志(业务日志、程序日志、容错校验日志)、并发、快照等
option(ENABLE_ADVANCED_FEATURE "Enable advanced features" OFF) option(ENABLE_ADVANCED_FEATURE "Enable advanced features" OFF)
option(OJ_TEST_BPT "Enable OJ test for B+ Tree" ON) option(OJ_TEST_BPT "Enable OJ test for B+ Tree" ON)
option(OJ_TEST_BACKEND "Enable OJ test for backend" OFF) option(OJ_TEST_BACKEND "Enable OJ test for backend" OFF)

View File

@ -64,7 +64,7 @@ class BPlusTreeIndexer {
res.is_end = false; res.is_end = false;
return res; return res;
} }
void InsertFixUpLookPartA(PositionSignType &pos, BasicPageGuard &parent_page_guard, BasicPageGuard &new_page_guard, void InsertFixUpLoopPartA(PositionSignType &pos, BasicPageGuard &parent_page_guard, BasicPageGuard &new_page_guard,
BasicPageGuard &page_guard, default_numeric_index_t new_page_id) { BasicPageGuard &page_guard, default_numeric_index_t new_page_id) {
pos.path[pos.path.size() - 2].second++; pos.path[pos.path.size() - 2].second++;
// now check we are able to "insert" (new_page_guard.template // now check we are able to "insert" (new_page_guard.template
@ -125,9 +125,9 @@ class BPlusTreeIndexer {
new_page_id); new_page_id);
KeyType key_to_update_backup = KeyType key_to_update_backup =
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first; page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first;
InsertFixUpLookPartB(pos, parent_page_guard, new_entry_backup, key_to_update_backup); InsertFixUpLoopPartB(pos, parent_page_guard, new_entry_backup, key_to_update_backup);
} }
void InsertFixUpLookPartB(PositionSignType &pos, BasicPageGuard &page_guard, const key_index_pair_t &new_entry_backup, void InsertFixUpLoopPartB(PositionSignType &pos, BasicPageGuard &page_guard, const key_index_pair_t &new_entry_backup,
const KeyType &key_to_update_backup) { const KeyType &key_to_update_backup) {
default_numeric_index_t new_page_id; default_numeric_index_t new_page_id;
auto new_page_guard = std::move(bpm->NewPageGuarded(&new_page_id)); auto new_page_guard = std::move(bpm->NewPageGuarded(&new_page_id));
@ -195,7 +195,7 @@ class BPlusTreeIndexer {
return; return;
} }
auto &parent_page_guard = pos.path[pos.path.size() - 2].first; auto &parent_page_guard = pos.path[pos.path.size() - 2].first;
InsertFixUpLookPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id); InsertFixUpLoopPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id);
} }
void InsertEntryAt(PositionSignType &pos, const KeyType &key, b_plus_tree_value_index_t value, void InsertEntryAt(PositionSignType &pos, const KeyType &key, b_plus_tree_value_index_t value,
bool is_fixing_up_recursive = false) { bool is_fixing_up_recursive = false) {
@ -207,7 +207,6 @@ class BPlusTreeIndexer {
new_page_guard.AsMut<PageType>()->data.key_count = 1; new_page_guard.AsMut<PageType>()->data.key_count = 1;
new_page_guard.AsMut<PageType>()->data.p_data[0] = std::make_pair(key, value); new_page_guard.AsMut<PageType>()->data.p_data[0] = std::make_pair(key, value);
new_page_guard.AsMut<PageType>()->data.p_n = 0; new_page_guard.AsMut<PageType>()->data.p_n = 0;
if (!is_fixing_up_recursive) ++siz;
return; return;
} }
auto &page_guard = pos.path.back().first; auto &page_guard = pos.path.back().first;
@ -220,7 +219,6 @@ class BPlusTreeIndexer {
page_guard.template AsMut<PageType>()->data.key_count++; page_guard.template AsMut<PageType>()->data.key_count++;
// fprintf(stderr, "page_guard.template As<PageType>()->data.key_count = %d\n", // fprintf(stderr, "page_guard.template As<PageType>()->data.key_count = %d\n",
// (int)page_guard.template As<PageType>()->data.key_count); // (int)page_guard.template As<PageType>()->data.key_count);
if (!is_fixing_up_recursive) ++siz;
return; return;
} }
// In our case, the tree is not too high, so we do not consider borrowing from siblings, we just split the page. // In our case, the tree is not too high, so we do not consider borrowing from siblings, we just split the page.
@ -233,7 +231,7 @@ class BPlusTreeIndexer {
if (!is_fixing_up_recursive) if (!is_fixing_up_recursive)
new_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::LEAF; new_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::LEAF;
else else
new_page_guard.AsMut<PageType>()->data.page_status = 0; // PageStatusType::INTERNAL; new_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::INTERNAL;
new_page_guard.AsMut<PageType>()->data.key_count = _ActualDataType::kMinNumberOfKeysForLeaf; new_page_guard.AsMut<PageType>()->data.key_count = _ActualDataType::kMinNumberOfKeysForLeaf;
page_guard.template AsMut<PageType>()->data.key_count -= _ActualDataType::kMinNumberOfKeysForLeaf; page_guard.template AsMut<PageType>()->data.key_count -= _ActualDataType::kMinNumberOfKeysForLeaf;
if (!is_fixing_up_recursive) if (!is_fixing_up_recursive)
@ -278,7 +276,6 @@ class BPlusTreeIndexer {
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first, page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first,
page_guard.PageId()); page_guard.PageId());
new_root_page_guard.AsMut<PageType>()->data.p_data[1] = std::make_pair(KeyType(), new_page_id); new_root_page_guard.AsMut<PageType>()->data.p_data[1] = std::make_pair(KeyType(), new_page_id);
if (!is_fixing_up_recursive) ++siz;
// fprintf(stderr, "new_page_guard.AsMut<PageType>()->data.key_count = %d\n", // fprintf(stderr, "new_page_guard.AsMut<PageType>()->data.key_count = %d\n",
// (int)new_page_guard.AsMut<PageType>()->data.key_count); // (int)new_page_guard.AsMut<PageType>()->data.key_count);
return; return;
@ -286,16 +283,15 @@ class BPlusTreeIndexer {
assert(pos.path.size() >= 2); assert(pos.path.size() >= 2);
auto &parent_page_guard = pos.path[pos.path.size() - 2].first; auto &parent_page_guard = pos.path[pos.path.size() - 2].first;
bool is_in_right_skew_path = false; bool is_in_right_skew_path = false;
if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As<PageType>()->data.key_count) { // if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As<PageType>()->data.key_count) {
is_in_right_skew_path = true; // is_in_right_skew_path = true;
} // }
if (pos.path.size() == 2 || pos.path[pos.path.size() - 3].second == if (pos.path.size() == 2 || pos.path[pos.path.size() - 3].second ==
pos.path[pos.path.size() - 3].first.template As<PageType>()->data.key_count) { pos.path[pos.path.size() - 3].first.template As<PageType>()->data.key_count) {
is_in_right_skew_path = true; is_in_right_skew_path = true;
} }
if (is_in_right_skew_path) { if (is_in_right_skew_path) {
InsertFixUpLookPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id); InsertFixUpLoopPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id);
if (!is_fixing_up_recursive) ++siz;
return; return;
} }
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second].first = parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second].first =
@ -308,31 +304,255 @@ class BPlusTreeIndexer {
->data.p_data[new_page_guard.template As<PageType>()->data.key_count - 1] ->data.p_data[new_page_guard.template As<PageType>()->data.key_count - 1]
.first, .first,
new_page_id, true); new_page_id, true);
if (!is_fixing_up_recursive) ++siz;
return;
}
void RemoveEntryAt(PositionSignType &pos) {
if (siz == 1) {
// special case for the last entry
bpm->DeletePage(root_page_id);
root_page_id = 0;
--siz;
return; return;
} }
void RemoveEntryInRightSkewPath(PositionSignType &pos) {
auto &page_guard = pos.path.back().first; auto &page_guard = pos.path.back().first;
bool has_enough_keys = false;
if (page_guard.template As<PageType>()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf || if (page_guard.template As<PageType>()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf ||
(page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT) != 0) { (page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT) != 0) {
// case 1: the page has enough keys // case 1: the page has enough keys
has_enough_keys = true;
}
if (pos.path.back().second == page_guard.template As<PageType>()->data.key_count) {
// The "entry" to remove is just a past-the-end pointer
page_guard.template AsMut<PageType>()->data.key_count--;
return;
} else {
// The "entry" to remove is a key-val pair
memmove( memmove(
page_guard.template AsMut<PageType>()->data.p_data + pos.path.back().second, page_guard.template AsMut<PageType>()->data.p_data + pos.path.back().second,
page_guard.template As<PageType>()->data.p_data + pos.path.back().second + 1, page_guard.template As<PageType>()->data.p_data + pos.path.back().second + 1,
(page_guard.template As<PageType>()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t)); (page_guard.template As<PageType>()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t));
page_guard.template AsMut<PageType>()->data.key_count--; page_guard.template AsMut<PageType>()->data.key_count--;
--siz;
return; return;
} }
throw std::runtime_error("Not implemented yet: RemoveEntryAt"); if (has_enough_keys) {
// TODO if (page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT &&
page_guard.template As<PageType>()->data.key_count) {
// special case for the root page
root_page_id = page_guard.template As<PageType>()->data.p_data[0].second;
page_id_t page_to_delete = page_guard.PageId();
pos.path.clear(); // all page_guards are invalid now
bpm->DeletePage(page_to_delete);
return;
}
return;
}
assert(pos.path.size() >= 2);
assert(pos.path[pos.path.size() - 2].second > 0);
page_id_t possible_prev_page_id = 0;
auto &parent_page_guard = pos.path[pos.path.size() - 2].first;
possible_prev_page_id =
parent_page_guard.template As<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second - 1].second;
BasicPageGuard prev_page_guard = std::move(bpm->FetchPageBasic(possible_prev_page_id));
if (prev_page_guard.As<PageType>()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf) {
// borrow from prev
// first, set the past-the-end pointer
page_guard.template AsMut<PageType>()
->data.p_data[page_guard.template As<PageType>()->data.key_count + 1]
.second =
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count].second;
memmove(page_guard.template AsMut<PageType>()->data.p_data + 1, page_guard.template As<PageType>()->data.p_data,
page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
page_guard.template AsMut<PageType>()->data.p_data[0] =
prev_page_guard.template As<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count - 1];
page_guard.template AsMut<PageType>()->data.key_count++;
prev_page_guard.template AsMut<PageType>()->data.key_count--;
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first =
prev_page_guard.template As<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count - 1]
.first;
return;
}
// now we have no choice but to merge self into prev
memmove(prev_page_guard.template AsMut<PageType>()->data.p_data +
prev_page_guard.template As<PageType>()->data.key_count,
page_guard.template As<PageType>()->data.p_data,
page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
prev_page_guard.template AsMut<PageType>()->data.key_count += page_guard.template As<PageType>()->data.key_count;
prev_page_guard.template AsMut<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count]
.second =
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count].second;
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first =
prev_page_guard.template As<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count - 1]
.first;
pos.path.pop_back(); // page_guard is no longer valid
RemoveEntryInRightSkewPath(pos);
return;
}
void RemoveEntryAt(PositionSignType &pos, bool is_fixing_up_recursive = false) {
if (siz == 1) {
// special case for the last entry
bpm->DeletePage(root_page_id);
root_page_id = 0;
return;
}
auto &page_guard = pos.path.back().first;
bool has_enough_keys = false;
if (page_guard.template As<PageType>()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf ||
(page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT) != 0) {
// case 1: the page has enough keys
has_enough_keys = true;
}
memmove(
page_guard.template AsMut<PageType>()->data.p_data + pos.path.back().second,
page_guard.template As<PageType>()->data.p_data + pos.path.back().second + 1,
(page_guard.template As<PageType>()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t));
page_guard.template AsMut<PageType>()->data.key_count--;
if (has_enough_keys) {
if (page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT &&
page_guard.template As<PageType>()->data.key_count) {
// special case for the root page
root_page_id = page_guard.template As<PageType>()->data.p_data[0].second;
page_id_t page_to_delete = page_guard.PageId();
pos.path.clear(); // all page_guards are invalid now
bpm->DeletePage(page_to_delete);
return;
}
return;
}
assert(pos.path.size() >= 2);
// First, check if we can borrow from siblings. If we can, we just borrow from siblings. Otherwise, we just merge.
page_id_t possible_prev_page_id = 0, possible_next_page_id = 0;
auto &parent_page_guard = pos.path[pos.path.size() - 2].first;
bool is_in_right_skew_path = false;
// if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As<PageType>()->data.key_count) {
// is_in_right_skew_path = true;
// }
if (pos.path.size() == 2 || pos.path[pos.path.size() - 3].second ==
pos.path[pos.path.size() - 3].first.template As<PageType>()->data.key_count) {
is_in_right_skew_path = true;
}
if (is_in_right_skew_path) {
if (pos.path[pos.path.size() - 2].second < parent_page_guard.template As<PageType>()->data.key_count) {
if (pos.path[pos.path.size() - 2].second + 1 < _ActualDataType::kMaxKeyCount)
possible_next_page_id =
parent_page_guard.template As<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second + 1].second;
else
possible_next_page_id = parent_page_guard.template As<PageType>()->data.p_n;
}
} else {
if (pos.path[pos.path.size() - 2].second < parent_page_guard.template As<PageType>()->data.key_count - 1)
possible_next_page_id =
parent_page_guard.template As<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second + 1].second;
}
if (pos.path[pos.path.size() - 2].second > 0)
possible_prev_page_id =
parent_page_guard.template As<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second - 1].second;
if (possible_prev_page_id != 0) {
BasicPageGuard prev_page_guard = std::move(bpm->FetchPageBasic(possible_prev_page_id));
if (prev_page_guard.As<PageType>()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf) {
// borrow from prev
memmove(page_guard.template AsMut<PageType>()->data.p_data + 1, page_guard.template As<PageType>()->data.p_data,
page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
page_guard.template AsMut<PageType>()->data.p_data[0] =
prev_page_guard.template As<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count - 1];
page_guard.template AsMut<PageType>()->data.key_count++;
prev_page_guard.template AsMut<PageType>()->data.key_count--;
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first =
prev_page_guard.template As<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count - 1]
.first;
return;
}
}
if (possible_next_page_id != 0) {
BasicPageGuard next_page_guard = std::move(bpm->FetchPageBasic(possible_next_page_id));
if (next_page_guard.As<PageType>()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf) {
// borrow from next
page_guard.template AsMut<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count] =
next_page_guard.template As<PageType>()->data.p_data[0];
page_guard.template AsMut<PageType>()->data.key_count++;
next_page_guard.template AsMut<PageType>()->data.key_count--;
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second].first =
page_guard.template As<PageType>()
->data.p_data[page_guard.template As<PageType>()->data.key_count - 1]
.first;
memmove(next_page_guard.template AsMut<PageType>()->data.p_data,
next_page_guard.template As<PageType>()->data.p_data + 1,
next_page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
if (is_fixing_up_recursive &&
pos.path[pos.path.size() - 2].second + 1 == parent_page_guard.template As<PageType>()->data.key_count) {
if (next_page_guard.template As<PageType>()->data.key_count == _ActualDataType::kMaxKeyCount - 1) {
// special process for meaningful p_n
next_page_guard.template AsMut<PageType>()
->data.p_data[next_page_guard.template As<PageType>()->data.key_count]
.second = next_page_guard.template As<PageType>()->data.p_n;
} else {
// special process for past-the-end p_i
next_page_guard.template AsMut<PageType>()
->data.p_data[next_page_guard.template As<PageType>()->data.key_count]
.second = next_page_guard.template As<PageType>()
->data.p_data[next_page_guard.template As<PageType>()->data.key_count + 1]
.second;
}
}
return;
}
}
if (possible_prev_page_id != 0) {
// merge self into prev
BasicPageGuard prev_page_guard = std::move(bpm->FetchPageBasic(possible_prev_page_id));
prev_page_guard.template AsMut<PageType>()->data.p_n = page_guard.template As<PageType>()->data.p_n;
memmove(prev_page_guard.template AsMut<PageType>()->data.p_data +
prev_page_guard.template As<PageType>()->data.key_count,
page_guard.template As<PageType>()->data.p_data,
page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
prev_page_guard.template AsMut<PageType>()->data.key_count += page_guard.template As<PageType>()->data.key_count;
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first =
prev_page_guard.template As<PageType>()
->data.p_data[prev_page_guard.template As<PageType>()->data.key_count - 1]
.first;
page_id_t current_page_id = page_guard.PageId();
pos.path.pop_back(); // page_guard is no longer valid
if (!is_in_right_skew_path) {
bpm->DeletePage(current_page_id);
// we need to update the parent page
RemoveEntryAt(pos, true);
return;
}
RemoveEntryInRightSkewPath(pos);
return;
}
if (possible_next_page_id != 0) {
// merge self into next
assert(possible_prev_page_id == 0);
BasicPageGuard next_page_guard = std::move(bpm->FetchPageBasic(possible_next_page_id));
if (is_fixing_up_recursive &&
pos.path[pos.path.size() - 2].second + 1 == parent_page_guard.template As<PageType>()->data.key_count) {
// the next page has past-the-end pointer
if (next_page_guard.template As<PageType>()->data.key_count == _ActualDataType::kMaxKeyCount) {
next_page_guard.template AsMut<PageType>()->data.p_n =
next_page_guard.template As<PageType>()->data.p_data[_ActualDataType::kMaxKeyCount - 1].second;
} else {
next_page_guard.template AsMut<PageType>()->data.p_data[_ActualDataType::kMaxKeyCount].second =
next_page_guard.template As<PageType>()->data.p_data[_ActualDataType::kMaxKeyCount - 1].second;
}
}
memmove(
next_page_guard.template AsMut<PageType>()->data.p_data + page_guard.template As<PageType>()->data.key_count,
next_page_guard.template As<PageType>()->data.p_data,
next_page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
memmove(next_page_guard.template AsMut<PageType>()->data.p_data, page_guard.template As<PageType>()->data.p_data,
page_guard.template As<PageType>()->data.key_count * sizeof(key_index_pair_t));
next_page_guard.template AsMut<PageType>()->data.key_count += page_guard.template As<PageType>()->data.key_count;
page_id_t current_page_id = page_guard.PageId();
pos.path.pop_back(); // page_guard is no longer valid
if (!is_in_right_skew_path) {
bpm->DeletePage(current_page_id);
// we need to update the parent page
RemoveEntryAt(pos, true);
return;
}
RemoveEntryInRightSkewPath(pos);
return;
}
throw std::runtime_error("No sibling found!");
} }
public: public:
@ -493,6 +713,7 @@ class BPlusTreeIndexer {
return false; return false;
} }
InsertEntryAt(pos, key, value); InsertEntryAt(pos, key, value);
++siz;
return true; return true;
} }
bool Remove(const KeyType &key) { // Finish Design bool Remove(const KeyType &key) { // Finish Design
@ -504,6 +725,7 @@ class BPlusTreeIndexer {
if (key_cmp(key, pos.path.back().first.template As<PageType>()->data.p_data[pos.path.back().second].first)) if (key_cmp(key, pos.path.back().first.template As<PageType>()->data.p_data[pos.path.back().second].first))
return false; return false;
RemoveEntryAt(pos); RemoveEntryAt(pos);
--siz;
return true; return true;
} }
size_t Size() { return siz; } // Finish Design size_t Size() { return siz; } // Finish Design

View File

@ -22,6 +22,7 @@
基本参考:<https://en.wikipedia.org/wiki/B%2B_tree> 基本参考:<https://en.wikipedia.org/wiki/B%2B_tree>
- p[i]子树中的所有key K都满足 k[i-1] \< K \<= k[i]且k[i]一定能取到即直接无缝对接lower_bound - p[i]子树中的所有key K都满足 k[i-1] \< K \<= k[i]且k[i]一定能取到即直接无缝对接lower_bound
- 对外接口提供类似于迭代器的东西但该迭代器只支持向后单项移动、读取value值、修改value值并且迭代器会保留PageGuard因此如果B+树在迭代器之前析构,会出现访问越界。 - 对外接口提供类似于迭代器的东西但该迭代器只支持向后单项移动、读取value值、修改value值并且迭代器会保留PageGuard因此如果B+树在迭代器之前析构,会出现访问越界。
- 由于子区间**左开右闭**于是绝大多数Internal Page和Leaf Page一样都没有尾后指针整棵树的左下角会有一大片的leaf like pages它们都有个共同特性即指针数量和键值数量相同但真正的leaf page还需要额外维护page状态标号和p_n指针。
# UI设计 # UI设计
- 语言Python - 语言Python

View File

@ -21,6 +21,12 @@ class FixLengthString {
} }
return false; return false;
} }
bool operator==(const FixLengthString<length> &that) const {
for (size_t i = 0; i < length; i++) {
if (data[i] != that.data[i]) return false;
}
return true;
}
}; };
} // namespace bpt_basic_test } // namespace bpt_basic_test
TEST(BasicTest, Compile) { // This Test only test the compile of the code TEST(BasicTest, Compile) { // This Test only test the compile of the code
@ -280,10 +286,13 @@ TEST(HarderTest, Split_in_Put_Harder_1) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -291,9 +300,11 @@ TEST(HarderTest, Split_in_Put_Harder_1) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -324,10 +335,13 @@ TEST(HarderTest, Split_in_Put_Harder_2) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -335,9 +349,11 @@ TEST(HarderTest, Split_in_Put_Harder_2) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -370,10 +386,13 @@ TEST(HarderTest, Split_in_Put_Harder_3) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -381,9 +400,11 @@ TEST(HarderTest, Split_in_Put_Harder_3) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -416,10 +437,13 @@ TEST(HarderTest, Split_in_Put_Harder_4) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -427,9 +451,11 @@ TEST(HarderTest, Split_in_Put_Harder_4) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -462,10 +488,13 @@ TEST(HarderTest, Split_in_Put_Harder_5) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -473,9 +502,11 @@ TEST(HarderTest, Split_in_Put_Harder_5) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -508,10 +539,13 @@ TEST(HarderTest, Split_in_Put_Harder_6) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -519,9 +553,11 @@ TEST(HarderTest, Split_in_Put_Harder_6) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -554,10 +590,13 @@ TEST(HarderTest, Split_in_Put_Harder_7) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -565,9 +604,11 @@ TEST(HarderTest, Split_in_Put_Harder_7) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -600,10 +641,13 @@ TEST(HarderTest, Split_in_Put_Harder_8) {
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3); bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
ASSERT_EQ(bpt.Size(), i);
} }
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -611,9 +655,11 @@ TEST(HarderTest, Split_in_Put_Harder_8) {
bpm = new BufferPoolManager(20, 3, dm); bpm = new BufferPoolManager(20, 3, dm);
{ {
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm); BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), ops);
for (int i = 1; i <= ops; i++) { for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
} }
ASSERT_EQ(bpt.Size(), ops);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -623,7 +669,7 @@ TEST(HarderTest, Split_in_Put_Harder_9) {
std::vector<std::pair<int, int>> entries; std::vector<std::pair<int, int>> entries;
const int kNumberOfKeys = 100000; const int kNumberOfKeys = 100000;
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(RndSeed); std::mt19937 rnd(1);
for (int i = 0; i < kNumberOfKeys; i++) { for (int i = 0; i < kNumberOfKeys; i++) {
entries.push_back({i + 3, rnd()}); entries.push_back({i + 3, rnd()});
} }
@ -636,11 +682,14 @@ TEST(HarderTest, Split_in_Put_Harder_9) {
for (int i = 0; i < kNumberOfKeys; i++) { for (int i = 0; i < kNumberOfKeys; i++) {
bpt.Put(entries[i].first, entries[i].second); bpt.Put(entries[i].first, entries[i].second);
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
ASSERT_EQ(bpt.Size(), i + 1);
} }
ASSERT_EQ(bpt.Size(), kNumberOfKeys);
std::shuffle(entries.begin(), entries.end(), rnd); std::shuffle(entries.begin(), entries.end(), rnd);
for (int i = 0; i < kNumberOfKeys; i++) { for (int i = 0; i < kNumberOfKeys; i++) {
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
} }
ASSERT_EQ(bpt.Size(), kNumberOfKeys);
} }
delete bpm; delete bpm;
delete dm; delete dm;
@ -649,9 +698,78 @@ TEST(HarderTest, Split_in_Put_Harder_9) {
std::shuffle(entries.begin(), entries.end(), rnd); std::shuffle(entries.begin(), entries.end(), rnd);
{ {
BPlusTreeIndexer<long long, std::less<long long>> bpt(bpm); BPlusTreeIndexer<long long, std::less<long long>> bpt(bpm);
ASSERT_EQ(bpt.Size(), kNumberOfKeys);
for (int i = 0; i < kNumberOfKeys; i++) { for (int i = 0; i < kNumberOfKeys; i++) {
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
} }
ASSERT_EQ(bpt.Size(), kNumberOfKeys);
}
delete bpm;
delete dm;
dm = new DiskManager("/tmp/bpt14.db");
bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<long long, std::less<long long>> bpt(bpm);
ASSERT_EQ(bpt.Size(), kNumberOfKeys);
sort(entries.begin(), entries.end());
auto it_std = entries.begin();
auto it_bpt = bpt.lower_bound_const(entries[0].first);
for (int i = 0; i < kNumberOfKeys; i++) {
ASSERT_EQ(it_bpt.GetKey(), it_std->first);
ASSERT_EQ(it_bpt.GetValue(), it_std->second);
++it_bpt;
it_std++;
}
ASSERT_TRUE(it_bpt == bpt.end_const());
ASSERT_EQ(bpt.Size(), kNumberOfKeys);
}
delete bpm;
delete dm;
}
TEST(RemoveTest, RM_1) {
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(RndSeed);
const int str_len = 800;
typedef bpt_basic_test::FixLengthString<str_len> KeyType;
// fprintf(stderr, "sizeof(std::pair<KeyType, default_numeric_index_t>)=%lu\n",
// sizeof(std::pair<KeyType, default_numeric_index_t>));
const std::string db_file_name = "/tmp/bpt15.db";
remove(db_file_name.c_str());
std::vector<KeyType> keys;
const int max_keys = 10;
for (int i = 1; i <= max_keys; i++) {
KeyType key;
for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26;
key.data[6] = '\0';
keys.push_back(key);
}
std::sort(keys.begin(), keys.end());
DiskManager *dm = new DiskManager(db_file_name.c_str());
BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
bpt.Put(keys[1], 4);
bpt.Put(keys[0], 3);
bpt.Put(keys[2], 5);
bpt.Put(keys[3], 6);
bpt.Put(keys[4], 7);
bpt.Remove(keys[1]);
ASSERT_EQ(bpt.Size(), 4);
auto it = bpt.lower_bound_const(keys[0]);
ASSERT_EQ(it.GetKey(), keys[0]);
ASSERT_EQ(it.GetValue(), 3);
++it;
ASSERT_EQ(it.GetKey(), keys[2]);
ASSERT_EQ(it.GetValue(), 5);
++it;
ASSERT_EQ(it.GetKey(), keys[3]);
ASSERT_EQ(it.GetValue(), 6);
++it;
ASSERT_EQ(it.GetKey(), keys[4]);
ASSERT_EQ(it.GetValue(), 7);
++it;
ASSERT_TRUE(it == bpt.end_const());
} }
delete bpm; delete bpm;
delete dm; delete dm;

88
test/t1_std.cpp Normal file
View File

@ -0,0 +1,88 @@
// 此程序仅用于对拍
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <set>
#include <string>
#include <unordered_map>
#include "bpt/disk_manager.h"
typedef uint64_t hash_t;
inline hash_t Hash(std::string str) noexcept {
constexpr static char salt1[10] = "mL;]-=eT";
constexpr static char salt2[10] = "9B<mF_me";
constexpr static char inner_salt[17] = "si9aW@zl#2$3%4^!";
/* Reference: http://xorshift.di.unimi.it/splitmix64.c */
str = salt1 + str + salt2;
hash_t ret = 0;
int i = 0;
for (; i + 8 <= str.length(); i += 8) {
ret ^= *reinterpret_cast<const hash_t *>(str.c_str() + i);
ret ^= *reinterpret_cast<const hash_t *>(inner_salt + (i & 15));
ret += 0x9e3779b97f4a7c15;
ret = (ret ^ (ret >> 30)) * 0xbf58476d1ce4e5b9;
ret = (ret ^ (ret >> 27)) * 0x94d049bb133111eb;
ret ^= ret >> 31;
}
for (; i < str.length(); ++i) {
ret ^= str[i];
ret ^= inner_salt[i & 15];
ret += 0x9e3779b97f4a7c15;
ret = (ret ^ (ret >> 30)) * 0xbf58476d1ce4e5b9;
ret = (ret ^ (ret >> 27)) * 0x94d049bb133111eb;
ret ^= ret >> 31;
}
return ret;
}
std::unordered_map<hash_t, std::set<int>> mp;
int main() {
std::fstream f("data.txt");
hash_t key;
int value;
while (f >> key >> value) {
mp[key].insert(value);
}
int n;
std::cin >> n;
std::string op;
while (n-- > 0) {
std::cin >> op;
if (op == "insert") {
std::string key;
int value;
std::cin >> key >> value;
mp[Hash(key)].insert(value);
} else if (op == "delete") {
std::string key;
int value;
std::cin >> key >> value;
hash_t hsh = Hash(key);
mp[hsh].erase(value);
if (mp[hsh].empty()) mp.erase(hsh);
} else if (op == "find") {
std::string key;
int value;
std::cin >> key;
hash_t hsh = Hash(key);
if (mp.find(hsh) == mp.end()) {
std::cout << "null";
} else {
for (auto &x : mp[hsh]) {
std::cout << x << ' ';
}
}
std::cout << '\n';
} else {
std::cout << "Unknown operation\n";
}
}
f.close();
remove("data.txt");
f.open("data.txt", std::ios::out);
for (auto &x : mp) {
for (auto &y : x.second) {
f << x.first << ' ' << y << '\n';
}
}
return 0;
}