diff --git a/bpt/include/bpt/bpt.hpp b/bpt/include/bpt/bpt.hpp index 41c8527..5555c3b 100644 --- a/bpt/include/bpt/bpt.hpp +++ b/bpt/include/bpt/bpt.hpp @@ -317,7 +317,6 @@ class BPlusTreeIndexer { if (pos.path.back().second == page_guard.template As()->data.key_count) { // The "entry" to remove is just a past-the-end pointer page_guard.template AsMut()->data.key_count--; - return; } else { // The "entry" to remove is a key-val pair memmove( @@ -325,13 +324,23 @@ class BPlusTreeIndexer { page_guard.template As()->data.p_data + pos.path.back().second + 1, (page_guard.template As()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t)); page_guard.template AsMut()->data.key_count--; - return; + if (page_guard.template AsMut()->data.key_count + 1 < _ActualDataType::kMaxKeyCount) { + page_guard.template AsMut()->data.p_data[page_guard.template As()->data.key_count].second = + page_guard.template As() + ->data.p_data[page_guard.template As()->data.key_count + 1] + .second; + } else { + page_guard.template AsMut()->data.p_data[page_guard.template As()->data.key_count].second = + page_guard.template As()->data.p_n; + } } if (has_enough_keys) { if (page_guard.template As()->data.page_status & PageStatusType::ROOT && - page_guard.template As()->data.key_count) { + page_guard.template As()->data.key_count == 0) { // special case for the root page root_page_id = page_guard.template As()->data.p_data[0].second; + BasicPageGuard root_page_guard = bpm->FetchPageBasic(root_page_id); + root_page_guard.template AsMut()->data.page_status |= PageStatusType::ROOT; page_id_t page_to_delete = page_guard.PageId(); pos.path.clear(); // all page_guards are invalid now bpm->DeletePage(page_to_delete); @@ -372,10 +381,25 @@ class BPlusTreeIndexer { page_guard.template As()->data.p_data, page_guard.template As()->data.key_count * sizeof(key_index_pair_t)); prev_page_guard.template AsMut()->data.key_count += page_guard.template As()->data.key_count; - prev_page_guard.template AsMut() - ->data.p_data[prev_page_guard.template As()->data.key_count] - .second = - page_guard.template As()->data.p_data[page_guard.template As()->data.key_count].second; + if (page_guard.template As()->data.key_count < _ActualDataType::kMaxKeyCount) { + if (prev_page_guard.template As()->data.key_count < _ActualDataType::kMaxKeyCount) { + prev_page_guard.template AsMut() + ->data.p_data[prev_page_guard.template As()->data.key_count] + .second = + page_guard.template As()->data.p_data[page_guard.template As()->data.key_count].second; + } else { + prev_page_guard.template AsMut()->data.p_n = + page_guard.template As()->data.p_data[page_guard.template As()->data.key_count].second; + } + } else { + if (prev_page_guard.template As()->data.key_count < _ActualDataType::kMaxKeyCount) { + prev_page_guard.template AsMut() + ->data.p_data[prev_page_guard.template As()->data.key_count] + .second = page_guard.template As()->data.p_n; + } else { + prev_page_guard.template AsMut()->data.p_n = page_guard.template As()->data.p_n; + } + } parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second - 1].first = prev_page_guard.template As() ->data.p_data[prev_page_guard.template As()->data.key_count - 1] @@ -405,9 +429,11 @@ class BPlusTreeIndexer { page_guard.template AsMut()->data.key_count--; if (has_enough_keys) { if (page_guard.template As()->data.page_status & PageStatusType::ROOT && - page_guard.template As()->data.key_count) { + page_guard.template As()->data.key_count == 0) { // special case for the root page root_page_id = page_guard.template As()->data.p_data[0].second; + BasicPageGuard root_page_guard = bpm->FetchPageBasic(root_page_id); + root_page_guard.template AsMut()->data.page_status |= PageStatusType::ROOT; page_id_t page_to_delete = page_guard.PageId(); pos.path.clear(); // all page_guards are invalid now bpm->DeletePage(page_to_delete); @@ -510,8 +536,8 @@ class BPlusTreeIndexer { .first; page_id_t current_page_id = page_guard.PageId(); pos.path.pop_back(); // page_guard is no longer valid + bpm->DeletePage(current_page_id); if (!is_in_right_skew_path) { - bpm->DeletePage(current_page_id); // we need to update the parent page RemoveEntryAt(pos, true); return; @@ -526,12 +552,18 @@ class BPlusTreeIndexer { if (is_fixing_up_recursive && pos.path[pos.path.size() - 2].second + 1 == parent_page_guard.template As()->data.key_count) { // the next page has past-the-end pointer - if (next_page_guard.template As()->data.key_count == _ActualDataType::kMaxKeyCount) { + size_t intended_dest = next_page_guard.template As()->data.key_count + + page_guard.template As()->data.key_count; + if (intended_dest == _ActualDataType::kMaxKeyCount) { next_page_guard.template AsMut()->data.p_n = - next_page_guard.template As()->data.p_data[_ActualDataType::kMaxKeyCount - 1].second; + next_page_guard.template As() + ->data.p_data[next_page_guard.template As()->data.key_count] + .second; } else { - next_page_guard.template AsMut()->data.p_data[_ActualDataType::kMaxKeyCount].second = - next_page_guard.template As()->data.p_data[_ActualDataType::kMaxKeyCount - 1].second; + next_page_guard.template AsMut()->data.p_data[intended_dest].second = + next_page_guard.template As() + ->data.p_data[next_page_guard.template As()->data.key_count] + .second; } } memmove( @@ -543,8 +575,8 @@ class BPlusTreeIndexer { next_page_guard.template AsMut()->data.key_count += page_guard.template As()->data.key_count; page_id_t current_page_id = page_guard.PageId(); pos.path.pop_back(); // page_guard is no longer valid + bpm->DeletePage(current_page_id); if (!is_in_right_skew_path) { - bpm->DeletePage(current_page_id); // we need to update the parent page RemoveEntryAt(pos, true); return; diff --git a/test/bpt_basic_test.cpp b/test/bpt_basic_test.cpp index 37a5373..d1eab2a 100644 --- a/test/bpt_basic_test.cpp +++ b/test/bpt_basic_test.cpp @@ -773,4 +773,87 @@ TEST(RemoveTest, RM_1) { } delete bpm; delete dm; +} + +TEST(RemoveTest, RM_2) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(1); + const int str_len = 800; + typedef bpt_basic_test::FixLengthString KeyType; + fprintf(stderr, "sizeof(std::pair)=%lu\n", + sizeof(std::pair)); + const std::string db_file_name = "/tmp/bpt16.db"; + remove(db_file_name.c_str()); + std::vector> entries; + const int max_keys = 30; + const int keys_num_to_remove = 15; + for (int i = 1; i <= max_keys; i++) { + KeyType key; + for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26; + key.data[8] = '\0'; + entries.push_back(std::make_pair(key, i)); + } + std::sort(entries.begin(), entries.end()); + fprintf(stderr, "The entries are:\n"); + for (int i = 0; i < entries.size(); i++) { + fprintf(stderr, "key[%d]=%s value[%d]=%d\n", i, entries[i].first.data, i, entries[i].second); + } + DiskManager *dm = new DiskManager(db_file_name.c_str()); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= max_keys; i++) { + bpt.Put(entries[i - 1].first, entries[i - 1].second); + } + for (int i = 1; i <= keys_num_to_remove; i++) { + int id = rnd() % entries.size(); + fprintf(stderr, "removing key[%d]=%s value[%d]=%d\n", id, entries[id].first.data, id, entries[id].second); + bpt.Remove(entries[id].first); + entries.erase(entries.begin() + id); + ASSERT_EQ(bpt.Size(), entries.size()); + } + ASSERT_EQ(bpt.Size(), max_keys - keys_num_to_remove); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + } + delete bpm; + delete dm; + dm = new DiskManager(db_file_name.c_str()); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + } + delete bpm; + delete dm; + dm = new DiskManager(db_file_name.c_str()); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), entries.size()); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + sort(entries.begin(), entries.end()); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + auto it_std = entries.begin(); + auto it_bpt = bpt.lower_bound_const(entries[0].first); + for (int i = 0; i < entries.size(); i++) { + fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second); + ASSERT_TRUE(!(it_bpt == bpt.end_const())); + ASSERT_EQ(it_bpt.GetKey(), it_std->first); + ASSERT_EQ(it_bpt.GetValue(), it_std->second); + ++it_bpt; + it_std++; + } + ASSERT_TRUE(it_bpt == bpt.end_const()); + ASSERT_EQ(bpt.Size(), entries.size()); + } + delete bpm; + delete dm; } \ No newline at end of file