From bf76de65909573ed386da9f49dcc97544a14e77d Mon Sep 17 00:00:00 2001 From: happyZYM Date: Mon, 29 Apr 2024 12:19:46 +0000 Subject: [PATCH] finish writing remove, ready to deeeeeeeeeeeeeeeeeeeeeeeeeebug --- CMakeLists.txt | 2 +- bpt/include/bpt/bpt.hpp | 272 ++++++++++++++++++++++++++++++++++++---- design.md | 1 + test/bpt_basic_test.cpp | 138 ++++++++++++++++++-- test/t1_std.cpp | 88 +++++++++++++ 5 files changed, 465 insertions(+), 36 deletions(-) create mode 100644 test/t1_std.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 27e44e4..6d7a046 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ else() add_definitions(-DGIT_COMMIT_HASH="[developing]") endif() -# 设置一个布尔类型的选项,用于控制是否启用高级功能,如程序日志、并发、快照等 +# 设置一个布尔类型的选项,用于控制是否启用高级功能,如日志(业务日志、程序日志、容错校验日志)、并发、快照等 option(ENABLE_ADVANCED_FEATURE "Enable advanced features" OFF) option(OJ_TEST_BPT "Enable OJ test for B+ Tree" ON) option(OJ_TEST_BACKEND "Enable OJ test for backend" OFF) diff --git a/bpt/include/bpt/bpt.hpp b/bpt/include/bpt/bpt.hpp index 1251296..41c8527 100644 --- a/bpt/include/bpt/bpt.hpp +++ b/bpt/include/bpt/bpt.hpp @@ -64,7 +64,7 @@ class BPlusTreeIndexer { res.is_end = false; return res; } - void InsertFixUpLookPartA(PositionSignType &pos, BasicPageGuard &parent_page_guard, BasicPageGuard &new_page_guard, + void InsertFixUpLoopPartA(PositionSignType &pos, BasicPageGuard &parent_page_guard, BasicPageGuard &new_page_guard, BasicPageGuard &page_guard, default_numeric_index_t new_page_id) { pos.path[pos.path.size() - 2].second++; // now check we are able to "insert" (new_page_guard.template @@ -125,9 +125,9 @@ class BPlusTreeIndexer { new_page_id); KeyType key_to_update_backup = page_guard.template As()->data.p_data[page_guard.template As()->data.key_count - 1].first; - InsertFixUpLookPartB(pos, parent_page_guard, new_entry_backup, key_to_update_backup); + InsertFixUpLoopPartB(pos, parent_page_guard, new_entry_backup, key_to_update_backup); } - void InsertFixUpLookPartB(PositionSignType &pos, BasicPageGuard &page_guard, const key_index_pair_t &new_entry_backup, + void InsertFixUpLoopPartB(PositionSignType &pos, BasicPageGuard &page_guard, const key_index_pair_t &new_entry_backup, const KeyType &key_to_update_backup) { default_numeric_index_t new_page_id; auto new_page_guard = std::move(bpm->NewPageGuarded(&new_page_id)); @@ -195,7 +195,7 @@ class BPlusTreeIndexer { return; } auto &parent_page_guard = pos.path[pos.path.size() - 2].first; - InsertFixUpLookPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id); + InsertFixUpLoopPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id); } void InsertEntryAt(PositionSignType &pos, const KeyType &key, b_plus_tree_value_index_t value, bool is_fixing_up_recursive = false) { @@ -207,7 +207,6 @@ class BPlusTreeIndexer { new_page_guard.AsMut()->data.key_count = 1; new_page_guard.AsMut()->data.p_data[0] = std::make_pair(key, value); new_page_guard.AsMut()->data.p_n = 0; - if (!is_fixing_up_recursive) ++siz; return; } auto &page_guard = pos.path.back().first; @@ -220,7 +219,6 @@ class BPlusTreeIndexer { page_guard.template AsMut()->data.key_count++; // fprintf(stderr, "page_guard.template As()->data.key_count = %d\n", // (int)page_guard.template As()->data.key_count); - if (!is_fixing_up_recursive) ++siz; return; } // In our case, the tree is not too high, so we do not consider borrowing from siblings, we just split the page. @@ -233,7 +231,7 @@ class BPlusTreeIndexer { if (!is_fixing_up_recursive) new_page_guard.AsMut()->data.page_status = PageStatusType::LEAF; else - new_page_guard.AsMut()->data.page_status = 0; // PageStatusType::INTERNAL; + new_page_guard.AsMut()->data.page_status = PageStatusType::INTERNAL; new_page_guard.AsMut()->data.key_count = _ActualDataType::kMinNumberOfKeysForLeaf; page_guard.template AsMut()->data.key_count -= _ActualDataType::kMinNumberOfKeysForLeaf; if (!is_fixing_up_recursive) @@ -278,7 +276,6 @@ class BPlusTreeIndexer { page_guard.template As()->data.p_data[page_guard.template As()->data.key_count - 1].first, page_guard.PageId()); new_root_page_guard.AsMut()->data.p_data[1] = std::make_pair(KeyType(), new_page_id); - if (!is_fixing_up_recursive) ++siz; // fprintf(stderr, "new_page_guard.AsMut()->data.key_count = %d\n", // (int)new_page_guard.AsMut()->data.key_count); return; @@ -286,16 +283,15 @@ class BPlusTreeIndexer { assert(pos.path.size() >= 2); auto &parent_page_guard = pos.path[pos.path.size() - 2].first; bool is_in_right_skew_path = false; - if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As()->data.key_count) { - is_in_right_skew_path = true; - } + // if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As()->data.key_count) { + // is_in_right_skew_path = true; + // } if (pos.path.size() == 2 || pos.path[pos.path.size() - 3].second == pos.path[pos.path.size() - 3].first.template As()->data.key_count) { is_in_right_skew_path = true; } if (is_in_right_skew_path) { - InsertFixUpLookPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id); - if (!is_fixing_up_recursive) ++siz; + InsertFixUpLoopPartA(pos, parent_page_guard, new_page_guard, page_guard, new_page_id); return; } parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second].first = @@ -308,31 +304,255 @@ class BPlusTreeIndexer { ->data.p_data[new_page_guard.template As()->data.key_count - 1] .first, new_page_id, true); - if (!is_fixing_up_recursive) ++siz; return; } - void RemoveEntryAt(PositionSignType &pos) { - if (siz == 1) { - // special case for the last entry - bpm->DeletePage(root_page_id); - root_page_id = 0; - --siz; - return; - } + void RemoveEntryInRightSkewPath(PositionSignType &pos) { auto &page_guard = pos.path.back().first; + bool has_enough_keys = false; if (page_guard.template As()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf || (page_guard.template As()->data.page_status & PageStatusType::ROOT) != 0) { // case 1: the page has enough keys + has_enough_keys = true; + } + if (pos.path.back().second == page_guard.template As()->data.key_count) { + // The "entry" to remove is just a past-the-end pointer + page_guard.template AsMut()->data.key_count--; + return; + } else { + // The "entry" to remove is a key-val pair memmove( page_guard.template AsMut()->data.p_data + pos.path.back().second, page_guard.template As()->data.p_data + pos.path.back().second + 1, (page_guard.template As()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t)); page_guard.template AsMut()->data.key_count--; - --siz; return; } - throw std::runtime_error("Not implemented yet: RemoveEntryAt"); - // TODO + if (has_enough_keys) { + if (page_guard.template As()->data.page_status & PageStatusType::ROOT && + page_guard.template As()->data.key_count) { + // special case for the root page + root_page_id = page_guard.template As()->data.p_data[0].second; + page_id_t page_to_delete = page_guard.PageId(); + pos.path.clear(); // all page_guards are invalid now + bpm->DeletePage(page_to_delete); + return; + } + return; + } + assert(pos.path.size() >= 2); + assert(pos.path[pos.path.size() - 2].second > 0); + page_id_t possible_prev_page_id = 0; + auto &parent_page_guard = pos.path[pos.path.size() - 2].first; + possible_prev_page_id = + parent_page_guard.template As()->data.p_data[pos.path[pos.path.size() - 2].second - 1].second; + BasicPageGuard prev_page_guard = std::move(bpm->FetchPageBasic(possible_prev_page_id)); + if (prev_page_guard.As()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf) { + // borrow from prev + // first, set the past-the-end pointer + page_guard.template AsMut() + ->data.p_data[page_guard.template As()->data.key_count + 1] + .second = + page_guard.template As()->data.p_data[page_guard.template As()->data.key_count].second; + memmove(page_guard.template AsMut()->data.p_data + 1, page_guard.template As()->data.p_data, + page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + page_guard.template AsMut()->data.p_data[0] = + prev_page_guard.template As() + ->data.p_data[prev_page_guard.template As()->data.key_count - 1]; + page_guard.template AsMut()->data.key_count++; + prev_page_guard.template AsMut()->data.key_count--; + parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first = + prev_page_guard.template As() + ->data.p_data[prev_page_guard.template As()->data.key_count - 1] + .first; + return; + } + // now we have no choice but to merge self into prev + memmove(prev_page_guard.template AsMut()->data.p_data + + prev_page_guard.template As()->data.key_count, + page_guard.template As()->data.p_data, + page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + prev_page_guard.template AsMut()->data.key_count += page_guard.template As()->data.key_count; + prev_page_guard.template AsMut() + ->data.p_data[prev_page_guard.template As()->data.key_count] + .second = + page_guard.template As()->data.p_data[page_guard.template As()->data.key_count].second; + parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first = + prev_page_guard.template As() + ->data.p_data[prev_page_guard.template As()->data.key_count - 1] + .first; + pos.path.pop_back(); // page_guard is no longer valid + RemoveEntryInRightSkewPath(pos); + return; + } + void RemoveEntryAt(PositionSignType &pos, bool is_fixing_up_recursive = false) { + if (siz == 1) { + // special case for the last entry + bpm->DeletePage(root_page_id); + root_page_id = 0; + return; + } + auto &page_guard = pos.path.back().first; + bool has_enough_keys = false; + if (page_guard.template As()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf || + (page_guard.template As()->data.page_status & PageStatusType::ROOT) != 0) { + // case 1: the page has enough keys + has_enough_keys = true; + } + memmove( + page_guard.template AsMut()->data.p_data + pos.path.back().second, + page_guard.template As()->data.p_data + pos.path.back().second + 1, + (page_guard.template As()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t)); + page_guard.template AsMut()->data.key_count--; + if (has_enough_keys) { + if (page_guard.template As()->data.page_status & PageStatusType::ROOT && + page_guard.template As()->data.key_count) { + // special case for the root page + root_page_id = page_guard.template As()->data.p_data[0].second; + page_id_t page_to_delete = page_guard.PageId(); + pos.path.clear(); // all page_guards are invalid now + bpm->DeletePage(page_to_delete); + return; + } + return; + } + assert(pos.path.size() >= 2); + // First, check if we can borrow from siblings. If we can, we just borrow from siblings. Otherwise, we just merge. + page_id_t possible_prev_page_id = 0, possible_next_page_id = 0; + auto &parent_page_guard = pos.path[pos.path.size() - 2].first; + bool is_in_right_skew_path = false; + // if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As()->data.key_count) { + // is_in_right_skew_path = true; + // } + if (pos.path.size() == 2 || pos.path[pos.path.size() - 3].second == + pos.path[pos.path.size() - 3].first.template As()->data.key_count) { + is_in_right_skew_path = true; + } + if (is_in_right_skew_path) { + if (pos.path[pos.path.size() - 2].second < parent_page_guard.template As()->data.key_count) { + if (pos.path[pos.path.size() - 2].second + 1 < _ActualDataType::kMaxKeyCount) + possible_next_page_id = + parent_page_guard.template As()->data.p_data[pos.path[pos.path.size() - 2].second + 1].second; + else + possible_next_page_id = parent_page_guard.template As()->data.p_n; + } + } else { + if (pos.path[pos.path.size() - 2].second < parent_page_guard.template As()->data.key_count - 1) + possible_next_page_id = + parent_page_guard.template As()->data.p_data[pos.path[pos.path.size() - 2].second + 1].second; + } + if (pos.path[pos.path.size() - 2].second > 0) + possible_prev_page_id = + parent_page_guard.template As()->data.p_data[pos.path[pos.path.size() - 2].second - 1].second; + if (possible_prev_page_id != 0) { + BasicPageGuard prev_page_guard = std::move(bpm->FetchPageBasic(possible_prev_page_id)); + if (prev_page_guard.As()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf) { + // borrow from prev + memmove(page_guard.template AsMut()->data.p_data + 1, page_guard.template As()->data.p_data, + page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + page_guard.template AsMut()->data.p_data[0] = + prev_page_guard.template As() + ->data.p_data[prev_page_guard.template As()->data.key_count - 1]; + page_guard.template AsMut()->data.key_count++; + prev_page_guard.template AsMut()->data.key_count--; + parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first = + prev_page_guard.template As() + ->data.p_data[prev_page_guard.template As()->data.key_count - 1] + .first; + return; + } + } + if (possible_next_page_id != 0) { + BasicPageGuard next_page_guard = std::move(bpm->FetchPageBasic(possible_next_page_id)); + if (next_page_guard.As()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf) { + // borrow from next + page_guard.template AsMut()->data.p_data[page_guard.template As()->data.key_count] = + next_page_guard.template As()->data.p_data[0]; + page_guard.template AsMut()->data.key_count++; + next_page_guard.template AsMut()->data.key_count--; + parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second].first = + page_guard.template As() + ->data.p_data[page_guard.template As()->data.key_count - 1] + .first; + memmove(next_page_guard.template AsMut()->data.p_data, + next_page_guard.template As()->data.p_data + 1, + next_page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + if (is_fixing_up_recursive && + pos.path[pos.path.size() - 2].second + 1 == parent_page_guard.template As()->data.key_count) { + if (next_page_guard.template As()->data.key_count == _ActualDataType::kMaxKeyCount - 1) { + // special process for meaningful p_n + next_page_guard.template AsMut() + ->data.p_data[next_page_guard.template As()->data.key_count] + .second = next_page_guard.template As()->data.p_n; + } else { + // special process for past-the-end p_i + next_page_guard.template AsMut() + ->data.p_data[next_page_guard.template As()->data.key_count] + .second = next_page_guard.template As() + ->data.p_data[next_page_guard.template As()->data.key_count + 1] + .second; + } + } + return; + } + } + if (possible_prev_page_id != 0) { + // merge self into prev + BasicPageGuard prev_page_guard = std::move(bpm->FetchPageBasic(possible_prev_page_id)); + prev_page_guard.template AsMut()->data.p_n = page_guard.template As()->data.p_n; + memmove(prev_page_guard.template AsMut()->data.p_data + + prev_page_guard.template As()->data.key_count, + page_guard.template As()->data.p_data, + page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + prev_page_guard.template AsMut()->data.key_count += page_guard.template As()->data.key_count; + parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first = + prev_page_guard.template As() + ->data.p_data[prev_page_guard.template As()->data.key_count - 1] + .first; + page_id_t current_page_id = page_guard.PageId(); + pos.path.pop_back(); // page_guard is no longer valid + if (!is_in_right_skew_path) { + bpm->DeletePage(current_page_id); + // we need to update the parent page + RemoveEntryAt(pos, true); + return; + } + RemoveEntryInRightSkewPath(pos); + return; + } + if (possible_next_page_id != 0) { + // merge self into next + assert(possible_prev_page_id == 0); + BasicPageGuard next_page_guard = std::move(bpm->FetchPageBasic(possible_next_page_id)); + if (is_fixing_up_recursive && + pos.path[pos.path.size() - 2].second + 1 == parent_page_guard.template As()->data.key_count) { + // the next page has past-the-end pointer + if (next_page_guard.template As()->data.key_count == _ActualDataType::kMaxKeyCount) { + next_page_guard.template AsMut()->data.p_n = + next_page_guard.template As()->data.p_data[_ActualDataType::kMaxKeyCount - 1].second; + } else { + next_page_guard.template AsMut()->data.p_data[_ActualDataType::kMaxKeyCount].second = + next_page_guard.template As()->data.p_data[_ActualDataType::kMaxKeyCount - 1].second; + } + } + memmove( + next_page_guard.template AsMut()->data.p_data + page_guard.template As()->data.key_count, + next_page_guard.template As()->data.p_data, + next_page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + memmove(next_page_guard.template AsMut()->data.p_data, page_guard.template As()->data.p_data, + page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); + next_page_guard.template AsMut()->data.key_count += page_guard.template As()->data.key_count; + page_id_t current_page_id = page_guard.PageId(); + pos.path.pop_back(); // page_guard is no longer valid + if (!is_in_right_skew_path) { + bpm->DeletePage(current_page_id); + // we need to update the parent page + RemoveEntryAt(pos, true); + return; + } + RemoveEntryInRightSkewPath(pos); + return; + } + throw std::runtime_error("No sibling found!"); } public: @@ -493,6 +713,7 @@ class BPlusTreeIndexer { return false; } InsertEntryAt(pos, key, value); + ++siz; return true; } bool Remove(const KeyType &key) { // Finish Design @@ -504,6 +725,7 @@ class BPlusTreeIndexer { if (key_cmp(key, pos.path.back().first.template As()->data.p_data[pos.path.back().second].first)) return false; RemoveEntryAt(pos); + --siz; return true; } size_t Size() { return siz; } // Finish Design diff --git a/design.md b/design.md index e6db4c3..a78cc82 100644 --- a/design.md +++ b/design.md @@ -22,6 +22,7 @@ 基本参考: - p[i]子树中的所有key K都满足: k[i-1] \< K \<= k[i],且k[i]一定能取到,即直接无缝对接lower_bound - 对外接口提供类似于迭代器的东西,但该迭代器只支持向后单项移动、读取value值、修改value值,并且,迭代器会保留PageGuard,因此如果B+树在迭代器之前析构,会出现访问越界。 +- 由于子区间**左开右闭**,于是绝大多数Internal Page和Leaf Page一样,都没有尾后指针,整棵树的左下角会有一大片的leaf like pages,它们都有个共同特性,即指针数量和键值数量相同,但真正的leaf page还需要额外维护page状态标号和p_n指针。 # UI设计 - 语言:Python diff --git a/test/bpt_basic_test.cpp b/test/bpt_basic_test.cpp index 701765d..37a5373 100644 --- a/test/bpt_basic_test.cpp +++ b/test/bpt_basic_test.cpp @@ -21,6 +21,12 @@ class FixLengthString { } return false; } + bool operator==(const FixLengthString &that) const { + for (size_t i = 0; i < length; i++) { + if (data[i] != that.data[i]) return false; + } + return true; + } }; } // namespace bpt_basic_test TEST(BasicTest, Compile) { // This Test only test the compile of the code @@ -217,7 +223,7 @@ TEST(BasicTest, Split_in_Put_Simple_3) { const int str_len = 16; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); remove("/tmp/bpt5.db"); DiskManager *dm = new DiskManager("/tmp/bpt5.db"); BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); @@ -261,7 +267,7 @@ TEST(HarderTest, Split_in_Put_Harder_1) { const int str_len = 1360 - 4; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); remove("/tmp/bpt6.db"); DiskManager *dm = new DiskManager("/tmp/bpt6.db"); BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); @@ -280,10 +286,13 @@ TEST(HarderTest, Split_in_Put_Harder_1) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -291,9 +300,11 @@ TEST(HarderTest, Split_in_Put_Harder_1) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -305,7 +316,7 @@ TEST(HarderTest, Split_in_Put_Harder_2) { const int str_len = 2030; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); remove("/tmp/bpt7.db"); DiskManager *dm = new DiskManager("/tmp/bpt7.db"); BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); @@ -324,10 +335,13 @@ TEST(HarderTest, Split_in_Put_Harder_2) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -335,9 +349,11 @@ TEST(HarderTest, Split_in_Put_Harder_2) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -349,7 +365,7 @@ TEST(HarderTest, Split_in_Put_Harder_3) { const int str_len = 800; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); const std::string db_file_name = "/tmp/bpt8.db"; std::vector keys; const int ops = 1000; @@ -370,10 +386,13 @@ TEST(HarderTest, Split_in_Put_Harder_3) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -381,9 +400,11 @@ TEST(HarderTest, Split_in_Put_Harder_3) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -395,7 +416,7 @@ TEST(HarderTest, Split_in_Put_Harder_4) { const int str_len = 800; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); const std::string db_file_name = "/tmp/bpt9.db"; std::vector keys; const int ops = 1000; @@ -416,10 +437,13 @@ TEST(HarderTest, Split_in_Put_Harder_4) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -427,9 +451,11 @@ TEST(HarderTest, Split_in_Put_Harder_4) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -441,7 +467,7 @@ TEST(HarderTest, Split_in_Put_Harder_5) { const int str_len = 800; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); const std::string db_file_name = "/tmp/bpt10.db"; std::vector keys; const int ops = 15 + rnd() % 20; @@ -462,10 +488,13 @@ TEST(HarderTest, Split_in_Put_Harder_5) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -473,9 +502,11 @@ TEST(HarderTest, Split_in_Put_Harder_5) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -487,7 +518,7 @@ TEST(HarderTest, Split_in_Put_Harder_6) { const int str_len = 1000; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); const std::string db_file_name = "/tmp/bpt11.db"; std::vector keys; const int ops = 15 + rnd() % 20; @@ -508,10 +539,13 @@ TEST(HarderTest, Split_in_Put_Harder_6) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -519,9 +553,11 @@ TEST(HarderTest, Split_in_Put_Harder_6) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -533,7 +569,7 @@ TEST(HarderTest, Split_in_Put_Harder_7) { const int str_len = 2000; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); const std::string db_file_name = "/tmp/bpt12.db"; std::vector keys; const int ops = 15 + rnd() % 20; @@ -554,10 +590,13 @@ TEST(HarderTest, Split_in_Put_Harder_7) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -565,9 +604,11 @@ TEST(HarderTest, Split_in_Put_Harder_7) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -579,7 +620,7 @@ TEST(HarderTest, Split_in_Put_Harder_8) { const int str_len = 1300; typedef bpt_basic_test::FixLengthString KeyType; // fprintf(stderr, "sizeof(std::pair)=%lu\n", - // sizeof(std::pair)); + // sizeof(std::pair)); const std::string db_file_name = "/tmp/bpt13.db"; std::vector keys; const int ops = 15 + rnd() % 20; @@ -600,10 +641,13 @@ TEST(HarderTest, Split_in_Put_Harder_8) { for (int i = 1; i <= ops; i++) { bpt.Put(keys[i - 1], i + 3); ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + ASSERT_EQ(bpt.Size(), i); } + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -611,9 +655,11 @@ TEST(HarderTest, Split_in_Put_Harder_8) { bpm = new BufferPoolManager(20, 3, dm); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), ops); for (int i = 1; i <= ops; i++) { ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); } + ASSERT_EQ(bpt.Size(), ops); } delete bpm; delete dm; @@ -623,7 +669,7 @@ TEST(HarderTest, Split_in_Put_Harder_9) { std::vector> entries; const int kNumberOfKeys = 100000; const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); - std::mt19937 rnd(RndSeed); + std::mt19937 rnd(1); for (int i = 0; i < kNumberOfKeys; i++) { entries.push_back({i + 3, rnd()}); } @@ -636,11 +682,14 @@ TEST(HarderTest, Split_in_Put_Harder_9) { for (int i = 0; i < kNumberOfKeys; i++) { bpt.Put(entries[i].first, entries[i].second); ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + ASSERT_EQ(bpt.Size(), i + 1); } + ASSERT_EQ(bpt.Size(), kNumberOfKeys); std::shuffle(entries.begin(), entries.end(), rnd); for (int i = 0; i < kNumberOfKeys; i++) { ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); } + ASSERT_EQ(bpt.Size(), kNumberOfKeys); } delete bpm; delete dm; @@ -649,9 +698,78 @@ TEST(HarderTest, Split_in_Put_Harder_9) { std::shuffle(entries.begin(), entries.end(), rnd); { BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), kNumberOfKeys); for (int i = 0; i < kNumberOfKeys; i++) { ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); } + ASSERT_EQ(bpt.Size(), kNumberOfKeys); + } + delete bpm; + delete dm; + dm = new DiskManager("/tmp/bpt14.db"); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), kNumberOfKeys); + sort(entries.begin(), entries.end()); + auto it_std = entries.begin(); + auto it_bpt = bpt.lower_bound_const(entries[0].first); + for (int i = 0; i < kNumberOfKeys; i++) { + ASSERT_EQ(it_bpt.GetKey(), it_std->first); + ASSERT_EQ(it_bpt.GetValue(), it_std->second); + ++it_bpt; + it_std++; + } + ASSERT_TRUE(it_bpt == bpt.end_const()); + ASSERT_EQ(bpt.Size(), kNumberOfKeys); + } + delete bpm; + delete dm; +} + +TEST(RemoveTest, RM_1) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + const int str_len = 800; + typedef bpt_basic_test::FixLengthString KeyType; + // fprintf(stderr, "sizeof(std::pair)=%lu\n", + // sizeof(std::pair)); + const std::string db_file_name = "/tmp/bpt15.db"; + remove(db_file_name.c_str()); + std::vector keys; + const int max_keys = 10; + for (int i = 1; i <= max_keys; i++) { + KeyType key; + for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26; + key.data[6] = '\0'; + keys.push_back(key); + } + std::sort(keys.begin(), keys.end()); + DiskManager *dm = new DiskManager(db_file_name.c_str()); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + bpt.Put(keys[1], 4); + bpt.Put(keys[0], 3); + bpt.Put(keys[2], 5); + bpt.Put(keys[3], 6); + bpt.Put(keys[4], 7); + bpt.Remove(keys[1]); + ASSERT_EQ(bpt.Size(), 4); + auto it = bpt.lower_bound_const(keys[0]); + ASSERT_EQ(it.GetKey(), keys[0]); + ASSERT_EQ(it.GetValue(), 3); + ++it; + ASSERT_EQ(it.GetKey(), keys[2]); + ASSERT_EQ(it.GetValue(), 5); + ++it; + ASSERT_EQ(it.GetKey(), keys[3]); + ASSERT_EQ(it.GetValue(), 6); + ++it; + ASSERT_EQ(it.GetKey(), keys[4]); + ASSERT_EQ(it.GetValue(), 7); + ++it; + ASSERT_TRUE(it == bpt.end_const()); } delete bpm; delete dm; diff --git a/test/t1_std.cpp b/test/t1_std.cpp new file mode 100644 index 0000000..aa14d9e --- /dev/null +++ b/test/t1_std.cpp @@ -0,0 +1,88 @@ +// 此程序仅用于对拍 +#include +#include +#include +#include +#include +#include +#include +#include "bpt/disk_manager.h" +typedef uint64_t hash_t; +inline hash_t Hash(std::string str) noexcept { + constexpr static char salt1[10] = "mL;]-=eT"; + constexpr static char salt2[10] = "9B(str.c_str() + i); + ret ^= *reinterpret_cast(inner_salt + (i & 15)); + ret += 0x9e3779b97f4a7c15; + ret = (ret ^ (ret >> 30)) * 0xbf58476d1ce4e5b9; + ret = (ret ^ (ret >> 27)) * 0x94d049bb133111eb; + ret ^= ret >> 31; + } + for (; i < str.length(); ++i) { + ret ^= str[i]; + ret ^= inner_salt[i & 15]; + ret += 0x9e3779b97f4a7c15; + ret = (ret ^ (ret >> 30)) * 0xbf58476d1ce4e5b9; + ret = (ret ^ (ret >> 27)) * 0x94d049bb133111eb; + ret ^= ret >> 31; + } + return ret; +} +std::unordered_map> mp; +int main() { + std::fstream f("data.txt"); + hash_t key; + int value; + while (f >> key >> value) { + mp[key].insert(value); + } + int n; + std::cin >> n; + std::string op; + while (n-- > 0) { + std::cin >> op; + if (op == "insert") { + std::string key; + int value; + std::cin >> key >> value; + mp[Hash(key)].insert(value); + } else if (op == "delete") { + std::string key; + int value; + std::cin >> key >> value; + hash_t hsh = Hash(key); + mp[hsh].erase(value); + if (mp[hsh].empty()) mp.erase(hsh); + } else if (op == "find") { + std::string key; + int value; + std::cin >> key; + hash_t hsh = Hash(key); + if (mp.find(hsh) == mp.end()) { + std::cout << "null"; + } else { + for (auto &x : mp[hsh]) { + std::cout << x << ' '; + } + } + std::cout << '\n'; + } else { + std::cout << "Unknown operation\n"; + } + } + f.close(); + remove("data.txt"); + f.open("data.txt", std::ios::out); + for (auto &x : mp) { + for (auto &y : x.second) { + f << x.first << ' ' << y << '\n'; + } + } + return 0; +} \ No newline at end of file