ready to write full right_skew_path fixup support

This commit is contained in:
2024-04-28 07:32:42 +00:00
parent 617b366d90
commit 91fdd2915f
4 changed files with 133 additions and 8 deletions

View File

@ -64,7 +64,8 @@ class BPlusTreeIndexer {
res.is_end = false; res.is_end = false;
return res; return res;
} }
void InsertEntryAt(PositionSignType &pos, const KeyType &key, b_plus_tree_value_index_t value) { void InsertEntryAt(PositionSignType &pos, const KeyType &key, b_plus_tree_value_index_t value,
bool is_fixing_up_recursive = false) {
fprintf(stderr, "_ActualDataType::kMaxKeyCount = %d\n", (int)_ActualDataType::kMaxKeyCount); fprintf(stderr, "_ActualDataType::kMaxKeyCount = %d\n", (int)_ActualDataType::kMaxKeyCount);
if (siz == 0) { if (siz == 0) {
// special case for the first entry // special case for the first entry
@ -73,7 +74,7 @@ class BPlusTreeIndexer {
new_page_guard.AsMut<PageType>()->data.key_count = 1; new_page_guard.AsMut<PageType>()->data.key_count = 1;
new_page_guard.AsMut<PageType>()->data.p_data[0] = std::make_pair(key, value); new_page_guard.AsMut<PageType>()->data.p_data[0] = std::make_pair(key, value);
new_page_guard.AsMut<PageType>()->data.p_n = 0; new_page_guard.AsMut<PageType>()->data.p_n = 0;
++siz; if (!is_fixing_up_recursive) ++siz;
return; return;
} }
auto &page_guard = pos.path.back().first; auto &page_guard = pos.path.back().first;
@ -86,7 +87,7 @@ class BPlusTreeIndexer {
page_guard.template AsMut<PageType>()->data.key_count++; page_guard.template AsMut<PageType>()->data.key_count++;
fprintf(stderr, "page_guard.template As<PageType>()->data.key_count = %d\n", fprintf(stderr, "page_guard.template As<PageType>()->data.key_count = %d\n",
(int)page_guard.template As<PageType>()->data.key_count); (int)page_guard.template As<PageType>()->data.key_count);
++siz; if (!is_fixing_up_recursive) ++siz;
return; return;
} }
// In our case, the tree is not too high, so we do not consider borrowing from siblings, we just split the page. // In our case, the tree is not too high, so we do not consider borrowing from siblings, we just split the page.
@ -96,7 +97,10 @@ class BPlusTreeIndexer {
page_id_t new_page_id; page_id_t new_page_id;
BasicPageGuard new_page_guard = bpm->NewPageGuarded(&new_page_id); BasicPageGuard new_page_guard = bpm->NewPageGuarded(&new_page_id);
// Then move the last kMinNumberOfKeysForLeaf keys(including newly inserted) to the new page // Then move the last kMinNumberOfKeysForLeaf keys(including newly inserted) to the new page
if (!is_fixing_up_recursive)
new_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::LEAF; new_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::LEAF;
else
new_page_guard.AsMut<PageType>()->data.page_status = 0; // PageStatusType::INTERNAL;
new_page_guard.AsMut<PageType>()->data.key_count = _ActualDataType::kMinNumberOfKeysForLeaf; new_page_guard.AsMut<PageType>()->data.key_count = _ActualDataType::kMinNumberOfKeysForLeaf;
page_guard.template AsMut<PageType>()->data.key_count -= _ActualDataType::kMinNumberOfKeysForLeaf; page_guard.template AsMut<PageType>()->data.key_count -= _ActualDataType::kMinNumberOfKeysForLeaf;
new_page_guard.AsMut<PageType>()->data.p_n = page_guard.template As<PageType>()->data.p_n; new_page_guard.AsMut<PageType>()->data.p_n = page_guard.template As<PageType>()->data.p_n;
@ -131,7 +135,7 @@ class BPlusTreeIndexer {
page_guard.template AsMut<PageType>()->data.key_count++; page_guard.template AsMut<PageType>()->data.key_count++;
} }
if (page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT) { if (page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT) {
// special case for the root page // special case for the root page being splited
page_guard.template AsMut<PageType>()->data.page_status &= ~PageStatusType::ROOT; page_guard.template AsMut<PageType>()->data.page_status &= ~PageStatusType::ROOT;
BasicPageGuard new_root_page_guard = bpm->NewPageGuarded(&root_page_id); BasicPageGuard new_root_page_guard = bpm->NewPageGuarded(&root_page_id);
new_root_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::ROOT; new_root_page_guard.AsMut<PageType>()->data.page_status = PageStatusType::ROOT;
@ -140,12 +144,88 @@ class BPlusTreeIndexer {
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first, page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first,
page_guard.PageId()); page_guard.PageId());
new_root_page_guard.AsMut<PageType>()->data.p_data[1] = std::make_pair(KeyType(), new_page_id); new_root_page_guard.AsMut<PageType>()->data.p_data[1] = std::make_pair(KeyType(), new_page_id);
++siz; if (!is_fixing_up_recursive) ++siz;
fprintf(stderr, "new_page_guard.AsMut<PageType>()->data.key_count = %d\n", fprintf(stderr, "new_page_guard.AsMut<PageType>()->data.key_count = %d\n",
(int)new_page_guard.AsMut<PageType>()->data.key_count); (int)new_page_guard.AsMut<PageType>()->data.key_count);
return; return;
} }
assert(pos.path.size() >= 2);
auto &parent_page_guard = pos.path[pos.path.size() - 2].first;
bool is_in_right_skew_path = false;
if (pos.path[pos.path.size() - 2].second == parent_page_guard.template As<PageType>()->data.key_count) {
is_in_right_skew_path = true;
}
if (pos.path.size() == 2 || pos.path[pos.path.size() - 3].second ==
pos.path[pos.path.size() - 3].first.template As<PageType>()->data.key_count) {
is_in_right_skew_path = true;
}
if (is_in_right_skew_path) {
do {
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second].first =
page_guard.template As<PageType>()
->data.p_data[page_guard.template As<PageType>()->data.key_count - 1]
.first;
pos.path[pos.path.size() - 2].second++;
// now check we are able to "insert" (new_page_guard.template
// As<PageType>()->data.p_data[new_page_guard.template As<PageType>()->data.key_count - 1].first, new_page_id)
// at pos
if (parent_page_guard.template As<PageType>()->data.key_count < _ActualDataType::kMaxKeyCount) {
// Has enough space, reach end, just insert it
// first, manually move the last pointer
if (parent_page_guard.template As<PageType>()->data.key_count == _ActualDataType::kMaxKeyCount - 1) {
parent_page_guard.template AsMut<PageType>()->data.p_n =
parent_page_guard.template As<PageType>()
->data.p_data[parent_page_guard.template As<PageType>()->data.key_count]
.second;
} else {
parent_page_guard.template AsMut<PageType>()
->data.p_data[parent_page_guard.template As<PageType>()->data.key_count + 1]
.second = parent_page_guard.template As<PageType>()
->data.p_data[parent_page_guard.template As<PageType>()->data.key_count]
.second;
}
// Then, use memmove to move the key_point pairs
fprintf(stderr, "parent_page_guard.template As<PageType>()->data.key_count = %d\n",
(int)parent_page_guard.template As<PageType>()->data.key_count);
if (pos.path[pos.path.size() - 2].second < parent_page_guard.template As<PageType>()->data.key_count) {
memmove(
parent_page_guard.template AsMut<PageType>()->data.p_data + pos.path[pos.path.size() - 2].second + 1,
parent_page_guard.template As<PageType>()->data.p_data + pos.path[pos.path.size() - 2].second,
(parent_page_guard.template As<PageType>()->data.key_count - pos.path[pos.path.size() - 2].second) *
sizeof(key_index_pair_t));
}
// Then Set the key_point pair
if (pos.path[pos.path.size() - 2].second < _ActualDataType::kMaxKeyCount) {
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second] =
std::make_pair(new_page_guard.template As<PageType>()
->data.p_data[new_page_guard.template As<PageType>()->data.key_count - 1]
.first,
new_page_id);
} else {
// just set p_n
parent_page_guard.template AsMut<PageType>()->data.p_n = new_page_id;
}
parent_page_guard.template AsMut<PageType>()->data.key_count++;
break;
}
// TODO: process and prepare for next round
throw std::runtime_error("Not implemented yet: InsertEntryAt"); throw std::runtime_error("Not implemented yet: InsertEntryAt");
} while (true);
if (!is_fixing_up_recursive) ++siz;
return;
}
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second].first =
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first;
pos.path[pos.path.size() - 2].second++;
pos.path.pop_back();
fprintf(stderr, "begin processing recursively\n");
InsertEntryAt(pos,
new_page_guard.template As<PageType>()
->data.p_data[new_page_guard.template As<PageType>()->data.key_count - 1]
.first,
new_page_id, true);
if (!is_fixing_up_recursive) ++siz;
return;
} }
void RemoveEntryAt(PositionSignType &pos) { void RemoveEntryAt(PositionSignType &pos) {
if (siz == 1) { if (siz == 1) {

View File

@ -10,7 +10,6 @@ struct ActualDataType {
in_page_key_count_t key_count; in_page_key_count_t key_count;
const static size_t kMaxKeyCount = const static size_t kMaxKeyCount =
(kPageSize - sizeof(page_id_t) - sizeof(page_status_t) - sizeof(in_page_key_count_t)) / sizeof(value_type); (kPageSize - sizeof(page_id_t) - sizeof(page_status_t) - sizeof(in_page_key_count_t)) / sizeof(value_type);
const static size_t kMinNumberOfKeysForInternal = (kMaxKeyCount) / 2;
const static size_t kMinNumberOfKeysForLeaf = (kMaxKeyCount + 1) / 2; const static size_t kMinNumberOfKeysForLeaf = (kMaxKeyCount + 1) / 2;
value_type p_data[kMaxKeyCount]; value_type p_data[kMaxKeyCount];
static_assert(kMaxKeyCount >= 2, "kMaxKeyCount must be greater than or equal to 2"); static_assert(kMaxKeyCount >= 2, "kMaxKeyCount must be greater than or equal to 2");

View File

@ -1,2 +1,3 @@
#include "bpt/bpt.hpp"
#include "bpt/config.h" #include "bpt/config.h"
const b_plus_tree_value_index_t kInvalidValueIndex = -1; const b_plus_tree_value_index_t kInvalidValueIndex = -1;

View File

@ -342,3 +342,48 @@ TEST(HarderTest, Split_in_Put_Harder_2) {
delete bpm; delete bpm;
delete dm; delete dm;
} }
TEST(HarderTest, Split_in_Put_Harder_3) {
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(RndSeed);
const int str_len = 800;
typedef bpt_basic_test::FixLengthString<str_len> KeyType;
fprintf(stderr, "sizeof(std::pair<KeyType, default_numeric_index_t>)=%lu\n",
sizeof(std::pair<KeyType, default_numeric_index_t>));
const std::string db_file_name = "/tmp/bpt8.db";
std::vector<KeyType> keys;
const int ops = 20;
remove(db_file_name.c_str());
DiskManager *dm = new DiskManager(db_file_name.c_str());
BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm);
for (int i = 1; i <= ops; i++) {
KeyType key;
for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26;
key.data[str_len - 1] = '\0';
keys.push_back(key);
}
// sort(keys.begin(), keys.end());
std::shuffle(keys.begin(), keys.end(), rnd);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
for (int i = 1; i <= ops; i++) {
bpt.Put(keys[i - 1], i + 3);
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
}
for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
}
}
delete bpm;
delete dm;
dm = new DiskManager(db_file_name.c_str());
bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
for (int i = 1; i <= ops; i++) {
ASSERT_EQ(bpt.Get(keys[i - 1]), i + 3);
}
}
delete bpm;
delete dm;
}