diff --git a/bpt/include/bpt/bpt.hpp b/bpt/include/bpt/bpt.hpp index 152db96..607e63e 100644 --- a/bpt/include/bpt/bpt.hpp +++ b/bpt/include/bpt/bpt.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include "bpt/bpt_page.hpp" @@ -36,6 +37,7 @@ class BPlusTreeIndexer { static auto comparer_for_key_index_pair = [](const key_index_pair_t &a, const KeyType &b) { return key_cmp(a.first, b); }; + // fprintf(stderr, "current page has %u keys\n", current_page_guard.As()->data.key_count); in_page_key_count_t nxt = std::lower_bound(current_page_guard.As()->data.p_data, current_page_guard.As()->data.p_data + current_page_guard.As()->data.key_count, @@ -427,6 +429,11 @@ class BPlusTreeIndexer { page_guard.template As()->data.p_data + pos.path.back().second + 1, (page_guard.template As()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t)); page_guard.template AsMut()->data.key_count--; + if (pos.path.size() >= 2 && page_guard.template AsMut()->data.key_count == pos.path.back().second) { + auto &parent_page_guard = pos.path[pos.path.size() - 2].first; + parent_page_guard.template AsMut()->data.p_data[pos.path[pos.path.size() - 2].second].first = + page_guard.template As()->data.p_data[page_guard.template As()->data.key_count - 1].first; + } if (has_enough_keys) { if (page_guard.template As()->data.page_status & PageStatusType::ROOT && page_guard.template As()->data.key_count == 0) { @@ -679,6 +686,36 @@ class BPlusTreeIndexer { return *this; } }; + // void DfsCheckIndex(page_id_t cur, KeyType right_bound, bool check_right_bound) { + // BasicPageGuard guard = bpm->FetchPageBasic(cur); + // if (check_right_bound) { + // if (guard.template As()->data.p_data[guard.template As()->data.key_count - 1].first != + // right_bound) { + // throw std::runtime_error("Index is not sorted!"); + // } + // } + // if (guard.template As()->data.page_status & PageStatusType::LEAF) { + // return; + // } + // for (int i = 0; i < guard.template As()->data.key_count; i++) { + // DfsCheckIndex(guard.template As()->data.p_data[i].second, + // guard.template As()->data.p_data[i].first, true); + // } + // if (!check_right_bound) { + // int past_the_end_pointer; + // if (guard.template As()->data.key_count < _ActualDataType::kMaxKeyCount) { + // past_the_end_pointer = + // guard.template As()->data.p_data[guard.template As()->data.key_count].second; + // } else { + // past_the_end_pointer = guard.template As()->data.p_n; + // } + // DfsCheckIndex(past_the_end_pointer, KeyType(), false); + // } + // } + // void CheckIndex() { + // if (siz == 0) return; + // DfsCheckIndex(root_page_id, KeyType(), false); + // } BPlusTreeIndexer() = delete; BPlusTreeIndexer(const BPlusTreeIndexer &) = delete; BPlusTreeIndexer(BPlusTreeIndexer &&) = delete; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c84657f..7c03f94 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,4 +12,10 @@ target_link_libraries(page_guard_test bpt GTest::gtest_main) add_executable(bpt_basic_test bpt_basic_test.cpp) target_link_libraries(bpt_basic_test bpt GTest::gtest_main spdlog::spdlog) add_executable(buffer_pool_manager_extreme_test buffer_pool_manager_extreme_test.cpp) -target_link_libraries(buffer_pool_manager_extreme_test bpt) \ No newline at end of file +target_link_libraries(buffer_pool_manager_extreme_test bpt) +add_executable(t1_std t1_std.cpp) +set_target_properties(t1_std PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +add_executable(t1_mk t1_mk.cpp) +set_target_properties(t1_mk PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +add_executable(bpt_advanced_test bpt_advanced_test.cpp) +target_link_libraries(bpt_advanced_test bpt GTest::gtest_main spdlog::spdlog) \ No newline at end of file diff --git a/test/bpt_advanced_test.cpp b/test/bpt_advanced_test.cpp new file mode 100644 index 0000000..eaf6963 --- /dev/null +++ b/test/bpt_advanced_test.cpp @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include +#include +#include "bpt/bpt.hpp" +#include "bpt/buffer_pool_manager.h" +#include "bpt/config.h" +#include "bpt/disk_manager.h" +namespace bpt_advanced_test { +template +class FixLengthString { + public: + char data[length]; + bool operator<(const FixLengthString &that) const { + for (size_t i = 0; i < length; i++) { + if (data[i] < that.data[i]) return true; + if (data[i] > that.data[i]) return false; + } + return false; + } + bool operator==(const FixLengthString &that) const { + for (size_t i = 0; i < length; i++) { + if (data[i] != that.data[i]) return false; + } + return true; + } +}; +} // namespace bpt_advanced_test +TEST(STRING, huge_size_1) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + const int str_len = 10; + typedef bpt_advanced_test::FixLengthString KeyType; + fprintf(stderr, "sizeof(std::pair)=%lu\n", + sizeof(std::pair)); + const std::string db_file_name = "/tmp/bpt17.db"; + remove(db_file_name.c_str()); + std::vector> entries; + const int max_keys = 1000; + const int keys_num_to_remove = 990; + for (int i = 1; i <= max_keys; i++) { + KeyType key; + for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26; + key.data[str_len - 1] = '\0'; + entries.push_back(std::make_pair(key, i)); + } + // std::sort(entries.begin(), entries.end()); + std::shuffle(entries.begin(), entries.end(), rnd); + fprintf(stderr, "The entries are:\n"); + for (int i = 0; i < entries.size(); i++) { + fprintf(stderr, "key[%d]=%s value[%d]=%d\n", i, entries[i].first.data, i, entries[i].second); + } + DiskManager *dm = new DiskManager(db_file_name.c_str()); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 1; i <= max_keys; i++) { + bpt.Put(entries[i - 1].first, entries[i - 1].second); + } + for (int i = 1; i <= keys_num_to_remove; i++) { + // { + // // checking iteration + // auto it_std = entries.begin(); + // auto it_bpt = bpt.lower_bound_const(entries[0].first); + // for (int i = 0; i < entries.size(); i++) { + // fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second); + // ASSERT_TRUE(!(it_bpt == bpt.end_const())); + // ASSERT_EQ(it_bpt.GetKey(), it_std->first); + // ASSERT_EQ(it_bpt.GetValue(), it_std->second); + // ++it_bpt; + // it_std++; + // } + // ASSERT_TRUE(it_bpt == bpt.end_const()); + // ASSERT_EQ(bpt.Size(), entries.size()); + // } + int id = rnd() % entries.size(); + fprintf(stderr, "removing key[%d]=%s value[%d]=%d\n", id, entries[id].first.data, id, entries[id].second); + bpt.Remove(entries[id].first); + entries.erase(entries.begin() + id); + ASSERT_EQ(bpt.Size(), entries.size()); + for (int j = 0; j < entries.size(); j++) { + ASSERT_EQ(bpt.Get(entries[j].first), entries[j].second); + } + { + // checking iteration + std::sort(entries.begin(), entries.end()); + auto it_std = entries.begin(); + auto it_bpt = bpt.lower_bound_const(entries[0].first); + for (int i = 0; i < entries.size(); i++) { + fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second); + ASSERT_TRUE(!(it_bpt == bpt.end_const())); + ASSERT_EQ(it_bpt.GetKey(), it_std->first); + ASSERT_EQ(it_bpt.GetValue(), it_std->second); + ++it_bpt; + it_std++; + } + ASSERT_TRUE(it_bpt == bpt.end_const()); + ASSERT_EQ(bpt.Size(), entries.size()); + } + } + ASSERT_EQ(bpt.Size(), max_keys - keys_num_to_remove); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + } + delete bpm; + delete dm; + dm = new DiskManager(db_file_name.c_str()); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + } + delete bpm; + delete dm; + dm = new DiskManager(db_file_name.c_str()); + bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + ASSERT_EQ(bpt.Size(), entries.size()); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + sort(entries.begin(), entries.end()); + for (int i = 0; i < entries.size(); i++) { + ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second); + } + auto it_std = entries.begin(); + auto it_bpt = bpt.lower_bound_const(entries[0].first); + for (int i = 0; i < entries.size(); i++) { + fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second); + ASSERT_TRUE(!(it_bpt == bpt.end_const())); + ASSERT_EQ(it_bpt.GetKey(), it_std->first); + ASSERT_EQ(it_bpt.GetValue(), it_std->second); + ++it_bpt; + it_std++; + } + ASSERT_TRUE(it_bpt == bpt.end_const()); + ASSERT_EQ(bpt.Size(), entries.size()); + } + delete bpm; + delete dm; +} + +TEST(LONGLONG, huge_size_1) { + const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); + std::mt19937 rnd(RndSeed); + typedef long long KeyType; + const std::string db_file_name = "/tmp/bpt18.db"; + remove(db_file_name.c_str()); + std::map entries; + const int max_keys = 10000; + const int keys_num_to_remove = 9900; + for (int i = 1; i <= max_keys; i++) { + KeyType key = rnd(); + entries[key] = i; + } + DiskManager *dm = new DiskManager(db_file_name.c_str()); + BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm); + { + BPlusTreeIndexer> bpt(bpm); + for (auto &entry : entries) { + bpt.Put(entry.first, entry.second); + } + for (int i = 1; i <= keys_num_to_remove; i++) { + if (rnd() % 2 == 0) { + int id = rnd() % entries.size(); + auto it = entries.begin(); + for (int j = 0; j < id; j++) it++; + fprintf(stderr, "removing key=%lld value=%d\n", it->first, it->second); + bpt.Remove(it->first); + entries.erase(it); + } else { + // Put + KeyType key = rnd(); + int value = rnd(); + fprintf(stderr, "inserting key=%lld value=%d\n", key, value); + bpt.Put(key, value); + entries[key] = value; + } + ASSERT_EQ(bpt.Size(), entries.size()); + for (auto &entry : entries) { + ASSERT_EQ(bpt.Get(entry.first), entry.second); + } + { + // checking iteration + auto it_std = entries.begin(); + auto it_bpt = bpt.lower_bound_const(entries.begin()->first); + for (int i = 0; i < entries.size(); i++) { + fprintf(stderr, "i=%d checking key=%lld value=%d\n", i, it_std->first, it_std->second); + ASSERT_TRUE(!(it_bpt == bpt.end_const())); + ASSERT_EQ(it_bpt.GetKey(), it_std->first); + ASSERT_EQ(it_bpt.GetValue(), it_std->second); + ++it_bpt; + it_std++; + } + ASSERT_TRUE(it_bpt == bpt.end_const()); + ASSERT_EQ(bpt.Size(), entries.size()); + } + } + ASSERT_EQ(bpt.Size(), entries.size()); + for (auto &entry : entries) { + ASSERT_EQ(bpt.Get(entry.first), entry.second); + } + } +} \ No newline at end of file diff --git a/test/bpt_basic_test.cpp b/test/bpt_basic_test.cpp index 1a13abd..009715f 100644 --- a/test/bpt_basic_test.cpp +++ b/test/bpt_basic_test.cpp @@ -777,7 +777,7 @@ TEST(RemoveTest, RM_1) { TEST(RemoveTest, RM_2) { const unsigned int RndSeed = testing::GTEST_FLAG(random_seed); - std::mt19937 rnd(1); + std::mt19937 rnd(RndSeed); const int str_len = 800; typedef bpt_basic_test::FixLengthString KeyType; fprintf(stderr, "sizeof(std::pair)=%lu\n", diff --git a/test/t1_mk.cpp b/test/t1_mk.cpp new file mode 100644 index 0000000..6a30e2b --- /dev/null +++ b/test/t1_mk.cpp @@ -0,0 +1,64 @@ +#include +using namespace std; +const unsigned int RndSeed = random_device{}(); +mt19937 rnd(RndSeed); +int rnd_less(int n) { return rnd() % n; } +const int kMaxN = 5e4 + 10; +int fa[kMaxN]; +inline int ff(int u) { + int x = u, y; + while (fa[u] != u) u = fa[u]; + while (x != u) { + y = fa[x]; + fa[x] = u; + x = y; + } + return u; +} +int main(int argc, char *argv[]) { + cerr << "[ Data Generator: Seed = " << RndSeed << " ]" << endl; + FILE *fout = fopen("GeneratorSeed.txt", "a"); + fprintf(fout, "Seed = %u\n", RndSeed); + fclose(fout); + // ====================================== + int n = 1000; + int total_keys = 300; + set keys_set; + for (int i = 0; i < total_keys; i++) { + string key = "#" + to_string(rnd_less(1000000)) + "#"; + keys_set.insert(key); + } + vector keys_vec; + unordered_map> mp; + for (auto &key : keys_set) { + keys_vec.push_back(key); + } + cout << n << endl; + for (int i = 0; i < n; i++) { + int tmp = rnd() % 10; + if (tmp <= 4) { + string key = keys_vec[rnd_less(keys_vec.size())]; + int val = rnd_less(1000000); + cout << "insert " << key << " " << val << "\n"; + // 往 mp[key] 有序地插入 + auto it = lower_bound(mp[key].begin(), mp[key].end(), val); + if (it == mp[key].end() || *it != val) mp[key].insert(it, val); + } else if (tmp <= 6) { + string key = keys_vec[rnd_less(keys_vec.size())]; + int val = rnd_less(1000000); + if (rnd() % 2 == 0 && mp[key].size() > 0) { + // 选择一个有意义的删除项 + val = mp[key][rnd_less(mp[key].size())]; + } + cout << "delete " << key << " " << val << "\n"; + // 从 mp[key] 中删除 val + auto it = lower_bound(mp[key].begin(), mp[key].end(), val); + if (it != mp[key].end() && *it == val) mp[key].erase(it); + if (mp[key].empty()) mp.erase(key); + } else { + string key = keys_vec[rnd_less(keys_vec.size())]; + cout << "find " << key << "\n"; + } + } + return 0; +} \ No newline at end of file diff --git a/test/t1_std.cpp b/test/t1_std.cpp index aa14d9e..f113a96 100644 --- a/test/t1_std.cpp +++ b/test/t1_std.cpp @@ -1,4 +1,4 @@ -// 此程序仅用于对拍 +// 此程序仅用于对拍,并测试是否存在哈希冲突 #include #include #include @@ -6,7 +6,6 @@ #include #include #include -#include "bpt/disk_manager.h" typedef uint64_t hash_t; inline hash_t Hash(std::string str) noexcept { constexpr static char salt1[10] = "mL;]-=eT";