From 617b366d90a49218119d2521e426d7a8e285dd43 Mon Sep 17 00:00:00 2001 From: happyZYM Date: Sun, 28 Apr 2024 02:53:53 +0000 Subject: [PATCH] ready to write full split in put --- bpt/include/bpt/bpt.hpp | 80 ++++++++++++++-- test/CMakeLists.txt | 2 +- test/bpt_basic_test.cpp | 206 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 279 insertions(+), 9 deletions(-) diff --git a/bpt/include/bpt/bpt.hpp b/bpt/include/bpt/bpt.hpp index d49ca27..672c921 100644 --- a/bpt/include/bpt/bpt.hpp +++ b/bpt/include/bpt/bpt.hpp @@ -1,6 +1,8 @@ #ifndef BPT_HPP #define BPT_HPP #include +#include +#include #include #include #include @@ -18,7 +20,7 @@ class BPlusTreeIndexer { typedef BPlusTreePage PageType; typedef ActualDataType _ActualDataType; typedef std::pair key_index_pair_t; - typedef std::pair value_type; + // typedef std::pair value_type; private: struct PositionSignType { @@ -44,7 +46,7 @@ class BPlusTreeIndexer { while ((res.path.back().first.template As()->data.page_status & PageStatusType::LEAF) == 0) { default_numeric_index_t nxt_page_id; in_page_key_count_t internal_id = res.path.back().second; - if (internal_id < res.path.back().first.template As()->data.key_count) + if (internal_id < _ActualDataType::kMaxKeyCount) nxt_page_id = res.path.back().first.template As()->data.p_data[internal_id].second; else nxt_page_id = res.path.back().first.template As()->data.p_n; @@ -56,10 +58,14 @@ class BPlusTreeIndexer { next_page_guard.As()->data.p_data; res.path.push_back(std::make_pair(std::move(next_page_guard), nxt)); } - if (nxt == res.path.back().first.template As()->data.key_count) res.is_end = true; + if (nxt == res.path.back().first.template As()->data.key_count) + res.is_end = true; + else + res.is_end = false; return res; } void InsertEntryAt(PositionSignType &pos, const KeyType &key, b_plus_tree_value_index_t value) { + fprintf(stderr, "_ActualDataType::kMaxKeyCount = %d\n", (int)_ActualDataType::kMaxKeyCount); if (siz == 0) { // special case for the first entry BasicPageGuard new_page_guard = bpm->NewPageGuarded(&root_page_id); @@ -75,14 +81,71 @@ class BPlusTreeIndexer { // case 1: the page has enough space memmove(page_guard.template AsMut()->data.p_data + pos.path.back().second + 1, page_guard.template As()->data.p_data + pos.path.back().second, - (page_guard.template As()->data.key_count - pos.path.back().second) * sizeof(value_type)); + (page_guard.template As()->data.key_count - pos.path.back().second) * sizeof(key_index_pair_t)); page_guard.template AsMut()->data.p_data[pos.path.back().second] = std::make_pair(key, value); page_guard.template AsMut()->data.key_count++; + fprintf(stderr, "page_guard.template As()->data.key_count = %d\n", + (int)page_guard.template As()->data.key_count); ++siz; return; } + // In our case, the tree is not too high, so we do not consider borrowing from siblings, we just split the page. + // We first construct a new page, and then move half of the keys to the new page. + // The check if we split the root page, we just handle it. + // Otherwise, what we need to do is modify the parent page, then "insert" a new key to the parent page + page_id_t new_page_id; + BasicPageGuard new_page_guard = bpm->NewPageGuarded(&new_page_id); + // Then move the last kMinNumberOfKeysForLeaf keys(including newly inserted) to the new page + new_page_guard.AsMut()->data.page_status = PageStatusType::LEAF; + new_page_guard.AsMut()->data.key_count = _ActualDataType::kMinNumberOfKeysForLeaf; + page_guard.template AsMut()->data.key_count -= _ActualDataType::kMinNumberOfKeysForLeaf; + new_page_guard.AsMut()->data.p_n = page_guard.template As()->data.p_n; + page_guard.template AsMut()->data.p_n = new_page_id; + if (pos.path.back().second <= _ActualDataType::kMaxKeyCount - _ActualDataType::kMinNumberOfKeysForLeaf) { + // the new key is in the first half + memmove(new_page_guard.template AsMut()->data.p_data, + page_guard.template As()->data.p_data + _ActualDataType::kMaxKeyCount - + _ActualDataType::kMinNumberOfKeysForLeaf, + _ActualDataType::kMinNumberOfKeysForLeaf * sizeof(key_index_pair_t)); + memmove(page_guard.template AsMut()->data.p_data + pos.path.back().second + 1, + page_guard.template As()->data.p_data + pos.path.back().second, + (page_guard.template As()->data.key_count - pos.path.back().second) * sizeof(key_index_pair_t)); + page_guard.template AsMut()->data.p_data[pos.path.back().second] = std::make_pair(key, value); + page_guard.template AsMut()->data.key_count++; + } else { + // the new key is in the second half + memmove( + new_page_guard.template AsMut()->data.p_data, + page_guard.template As()->data.p_data + _ActualDataType::kMaxKeyCount - + _ActualDataType::kMinNumberOfKeysForLeaf + 1, + (pos.path.back().second - (_ActualDataType::kMaxKeyCount - _ActualDataType::kMinNumberOfKeysForLeaf + 1)) * + sizeof(key_index_pair_t)); + new_page_guard.template AsMut() + ->data.p_data[pos.path.back().second - + (_ActualDataType::kMaxKeyCount - _ActualDataType::kMinNumberOfKeysForLeaf + 1)] = + std::make_pair(key, value); + memmove(new_page_guard.template AsMut()->data.p_data + pos.path.back().second - + (_ActualDataType::kMaxKeyCount - _ActualDataType::kMinNumberOfKeysForLeaf + 1) + 1, + page_guard.template As()->data.p_data + pos.path.back().second, + (_ActualDataType::kMaxKeyCount - pos.path.back().second) * sizeof(key_index_pair_t)); + page_guard.template AsMut()->data.key_count++; + } + if (page_guard.template As()->data.page_status & PageStatusType::ROOT) { + // special case for the root page + page_guard.template AsMut()->data.page_status &= ~PageStatusType::ROOT; + BasicPageGuard new_root_page_guard = bpm->NewPageGuarded(&root_page_id); + new_root_page_guard.AsMut()->data.page_status = PageStatusType::ROOT; + new_root_page_guard.AsMut()->data.key_count = 1; + new_root_page_guard.AsMut()->data.p_data[0] = std::make_pair( + page_guard.template As()->data.p_data[page_guard.template As()->data.key_count - 1].first, + page_guard.PageId()); + new_root_page_guard.AsMut()->data.p_data[1] = std::make_pair(KeyType(), new_page_id); + ++siz; + fprintf(stderr, "new_page_guard.AsMut()->data.key_count = %d\n", + (int)new_page_guard.AsMut()->data.key_count); + return; + } throw std::runtime_error("Not implemented yet: InsertEntryAt"); - // TODO } void RemoveEntryAt(PositionSignType &pos) { if (siz == 1) { @@ -96,9 +159,10 @@ class BPlusTreeIndexer { if (page_guard.template As()->data.key_count > _ActualDataType::kMinNumberOfKeysForLeaf || (page_guard.template As()->data.page_status & PageStatusType::ROOT) != 0) { // case 1: the page has enough keys - memmove(page_guard.template AsMut()->data.p_data + pos.path.back().second, - page_guard.template As()->data.p_data + pos.path.back().second + 1, - (page_guard.template As()->data.key_count - pos.path.back().second - 1) * sizeof(value_type)); + memmove( + page_guard.template AsMut()->data.p_data + pos.path.back().second, + page_guard.template As()->data.p_data + pos.path.back().second + 1, + (page_guard.template As()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t)); page_guard.template AsMut()->data.key_count--; --siz; return; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d2008c9..c84657f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -10,6 +10,6 @@ target_link_libraries(buffer_pool_manager_test bpt GTest::gtest_main spdlog::spd add_executable(page_guard_test page_guard_test.cpp) target_link_libraries(page_guard_test bpt GTest::gtest_main) add_executable(bpt_basic_test bpt_basic_test.cpp) -target_link_libraries(bpt_basic_test bpt GTest::gtest_main) +target_link_libraries(bpt_basic_test bpt GTest::gtest_main spdlog::spdlog) add_executable(buffer_pool_manager_extreme_test buffer_pool_manager_extreme_test.cpp) target_link_libraries(buffer_pool_manager_extreme_test bpt) \ No newline at end of file diff --git a/test/bpt_basic_test.cpp b/test/bpt_basic_test.cpp index edbea93..034f3e5 100644 --- a/test/bpt_basic_test.cpp +++ b/test/bpt_basic_test.cpp @@ -1,5 +1,10 @@ #include +#include +#include +#include +#include #include +#include #include "bpt/bpt.hpp" #include "bpt/buffer_pool_manager.h" #include "bpt/config.h" @@ -9,6 +14,13 @@ template class FixLengthString { public: char data[length]; + bool operator<(const FixLengthString &that) const { + for (size_t i = 0; i < length; i++) { + if (data[i] < that.data[i]) return true; + if (data[i] > that.data[i]) return false; + } + return false; + } }; } // namespace bpt_basic_test TEST(BasicTest, Compile) { // This Test only test the compile of the code @@ -135,4 +147,198 @@ TEST(BasicTest, Put_Get_Remove) { } delete bpm; delete dm; +} + +TEST(BasicTest, Split_in_Put_Simple_1) { + remove("/tmp/bpt4.db"); + DiskManager *dm = new DiskManager("/tmp/bpt4.db"); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= 383; i++) { + bpt.Put(i, i + 3); + ASSERT_EQ(bpt.Get(i), i + 3); + } + for (int i = 1; i <= 383; i++) { + ASSERT_EQ(bpt.Get(i), i + 3); + } + } + delete bpm; + delete dm; + dm = new DiskManager("/tmp/bpt4.db"); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= 383; i++) { + ASSERT_EQ(bpt.Get(i), i + 3); + } + } + delete bpm; + delete dm; +} + +TEST(BasicTest, Split_in_Put_Simple_2) { + std::vector keys; + const int kNumberOfKeys = 383; + for (int i = 1; i <= kNumberOfKeys; i++) keys.push_back(i); + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + std::shuffle(keys.begin(), keys.end(), rnd); + remove("/tmp/bpt5.db"); + DiskManager *dm = new DiskManager("/tmp/bpt5.db"); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= kNumberOfKeys; i++) { + bpt.Put(keys[i - 1], i + 3); + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + for (int i = 1; i <= kNumberOfKeys; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; + dm = new DiskManager("/tmp/bpt5.db"); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= kNumberOfKeys; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; +} + +TEST(BasicTest, Split_in_Put_Simple_3) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + const int str_len = 16; + typedef bpt_basic_test::FixLengthString KeyType; + fprintf(stderr, "sizeof(std::pair)=%lu\n", + sizeof(std::pair)); + remove("/tmp/bpt5.db"); + DiskManager *dm = new DiskManager("/tmp/bpt5.db"); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + std::vector keys; + const int ops = 307; + for (int i = 1; i <= ops; i++) { + KeyType key; + for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26; + key.data[15] = '\0'; + keys.push_back(key); + } + // sort(keys.begin(), keys.end()); + std::shuffle(keys.begin(), keys.end(), rnd); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= ops; i++) { + bpt.Put(keys[i - 1], i + 3); + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + for (int i = 1; i <= ops; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; + dm = new DiskManager("/tmp/bpt5.db"); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= ops; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; +} + +TEST(HarderTest, Split_in_Put_Harder_1) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + const int str_len = 1360 - 4; + typedef bpt_basic_test::FixLengthString KeyType; + fprintf(stderr, "sizeof(std::pair)=%lu\n", + sizeof(std::pair)); + remove("/tmp/bpt6.db"); + DiskManager *dm = new DiskManager("/tmp/bpt6.db"); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + std::vector keys; + const int ops = 5; + for (int i = 1; i <= ops; i++) { + KeyType key; + for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26; + key.data[15] = '\0'; + keys.push_back(key); + } + // sort(keys.begin(), keys.end()); + std::shuffle(keys.begin(), keys.end(), rnd); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= ops; i++) { + bpt.Put(keys[i - 1], i + 3); + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + for (int i = 1; i <= ops; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; + dm = new DiskManager("/tmp/bpt6.db"); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= ops; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; +} + +TEST(HarderTest, Split_in_Put_Harder_2) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + const int str_len = 2030; + typedef bpt_basic_test::FixLengthString KeyType; + fprintf(stderr, "sizeof(std::pair)=%lu\n", + sizeof(std::pair)); + remove("/tmp/bpt7.db"); + DiskManager *dm = new DiskManager("/tmp/bpt7.db"); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + std::vector keys; + const int ops = 4; + for (int i = 1; i <= ops; i++) { + KeyType key; + for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26; + key.data[15] = '\0'; + keys.push_back(key); + } + sort(keys.begin(), keys.end()); + // std::shuffle(keys.begin(), keys.end(), rnd); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= ops; i++) { + bpt.Put(keys[i - 1], i + 3); + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + for (int i = 1; i <= ops; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; + dm = new DiskManager("/tmp/bpt7.db"); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= ops; i++) { + ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3); + } + } + delete bpm; + delete dm; } \ No newline at end of file