fixed a severe index error

This commit is contained in:
2024-04-30 02:34:52 +00:00
parent 0e9a7b2372
commit a037bf97cb
6 changed files with 321 additions and 4 deletions

View File

@ -4,6 +4,7 @@
#include <cassert>
#include <cstdio>
#include <cstring>
#include <queue>
#include <shared_mutex>
#include <vector>
#include "bpt/bpt_page.hpp"
@ -36,6 +37,7 @@ class BPlusTreeIndexer {
static auto comparer_for_key_index_pair = [](const key_index_pair_t &a, const KeyType &b) {
return key_cmp(a.first, b);
};
// fprintf(stderr, "current page has %u keys\n", current_page_guard.As<PageType>()->data.key_count);
in_page_key_count_t nxt = std::lower_bound(current_page_guard.As<PageType>()->data.p_data,
current_page_guard.As<PageType>()->data.p_data +
current_page_guard.As<PageType>()->data.key_count,
@ -427,6 +429,11 @@ class BPlusTreeIndexer {
page_guard.template As<PageType>()->data.p_data + pos.path.back().second + 1,
(page_guard.template As<PageType>()->data.key_count - pos.path.back().second - 1) * sizeof(key_index_pair_t));
page_guard.template AsMut<PageType>()->data.key_count--;
if (pos.path.size() >= 2 && page_guard.template AsMut<PageType>()->data.key_count == pos.path.back().second) {
auto &parent_page_guard = pos.path[pos.path.size() - 2].first;
parent_page_guard.template AsMut<PageType>()->data.p_data[pos.path[pos.path.size() - 2].second].first =
page_guard.template As<PageType>()->data.p_data[page_guard.template As<PageType>()->data.key_count - 1].first;
}
if (has_enough_keys) {
if (page_guard.template As<PageType>()->data.page_status & PageStatusType::ROOT &&
page_guard.template As<PageType>()->data.key_count == 0) {
@ -679,6 +686,36 @@ class BPlusTreeIndexer {
return *this;
}
};
// void DfsCheckIndex(page_id_t cur, KeyType right_bound, bool check_right_bound) {
// BasicPageGuard guard = bpm->FetchPageBasic(cur);
// if (check_right_bound) {
// if (guard.template As<PageType>()->data.p_data[guard.template As<PageType>()->data.key_count - 1].first !=
// right_bound) {
// throw std::runtime_error("Index is not sorted!");
// }
// }
// if (guard.template As<PageType>()->data.page_status & PageStatusType::LEAF) {
// return;
// }
// for (int i = 0; i < guard.template As<PageType>()->data.key_count; i++) {
// DfsCheckIndex(guard.template As<PageType>()->data.p_data[i].second,
// guard.template As<PageType>()->data.p_data[i].first, true);
// }
// if (!check_right_bound) {
// int past_the_end_pointer;
// if (guard.template As<PageType>()->data.key_count < _ActualDataType::kMaxKeyCount) {
// past_the_end_pointer =
// guard.template As<PageType>()->data.p_data[guard.template As<PageType>()->data.key_count].second;
// } else {
// past_the_end_pointer = guard.template As<PageType>()->data.p_n;
// }
// DfsCheckIndex(past_the_end_pointer, KeyType(), false);
// }
// }
// void CheckIndex() {
// if (siz == 0) return;
// DfsCheckIndex(root_page_id, KeyType(), false);
// }
BPlusTreeIndexer() = delete;
BPlusTreeIndexer(const BPlusTreeIndexer &) = delete;
BPlusTreeIndexer(BPlusTreeIndexer &&) = delete;

View File

@ -12,4 +12,10 @@ target_link_libraries(page_guard_test bpt GTest::gtest_main)
add_executable(bpt_basic_test bpt_basic_test.cpp)
target_link_libraries(bpt_basic_test bpt GTest::gtest_main spdlog::spdlog)
add_executable(buffer_pool_manager_extreme_test buffer_pool_manager_extreme_test.cpp)
target_link_libraries(buffer_pool_manager_extreme_test bpt)
target_link_libraries(buffer_pool_manager_extreme_test bpt)
add_executable(t1_std t1_std.cpp)
set_target_properties(t1_std PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_executable(t1_mk t1_mk.cpp)
set_target_properties(t1_mk PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_executable(bpt_advanced_test bpt_advanced_test.cpp)
target_link_libraries(bpt_advanced_test bpt GTest::gtest_main spdlog::spdlog)

211
test/bpt_advanced_test.cpp Normal file
View File

@ -0,0 +1,211 @@
#include <gtest/gtest.h>
#include <spdlog/async.h>
#include <spdlog/sinks/basic_file_sink.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <spdlog/spdlog.h>
#include <map>
#include <random>
#include "bpt/bpt.hpp"
#include "bpt/buffer_pool_manager.h"
#include "bpt/config.h"
#include "bpt/disk_manager.h"
namespace bpt_advanced_test {
template <size_t length>
class FixLengthString {
public:
char data[length];
bool operator<(const FixLengthString<length> &that) const {
for (size_t i = 0; i < length; i++) {
if (data[i] < that.data[i]) return true;
if (data[i] > that.data[i]) return false;
}
return false;
}
bool operator==(const FixLengthString<length> &that) const {
for (size_t i = 0; i < length; i++) {
if (data[i] != that.data[i]) return false;
}
return true;
}
};
} // namespace bpt_advanced_test
TEST(STRING, huge_size_1) {
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(RndSeed);
const int str_len = 10;
typedef bpt_advanced_test::FixLengthString<str_len> KeyType;
fprintf(stderr, "sizeof(std::pair<KeyType, default_numeric_index_t>)=%lu\n",
sizeof(std::pair<KeyType, default_numeric_index_t>));
const std::string db_file_name = "/tmp/bpt17.db";
remove(db_file_name.c_str());
std::vector<std::pair<KeyType, int>> entries;
const int max_keys = 1000;
const int keys_num_to_remove = 990;
for (int i = 1; i <= max_keys; i++) {
KeyType key;
for (size_t j = 0; j < str_len; j++) key.data[j] = 'a' + rnd() % 26;
key.data[str_len - 1] = '\0';
entries.push_back(std::make_pair(key, i));
}
// std::sort(entries.begin(), entries.end());
std::shuffle(entries.begin(), entries.end(), rnd);
fprintf(stderr, "The entries are:\n");
for (int i = 0; i < entries.size(); i++) {
fprintf(stderr, "key[%d]=%s value[%d]=%d\n", i, entries[i].first.data, i, entries[i].second);
}
DiskManager *dm = new DiskManager(db_file_name.c_str());
BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
for (int i = 1; i <= max_keys; i++) {
bpt.Put(entries[i - 1].first, entries[i - 1].second);
}
for (int i = 1; i <= keys_num_to_remove; i++) {
// {
// // checking iteration
// auto it_std = entries.begin();
// auto it_bpt = bpt.lower_bound_const(entries[0].first);
// for (int i = 0; i < entries.size(); i++) {
// fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second);
// ASSERT_TRUE(!(it_bpt == bpt.end_const()));
// ASSERT_EQ(it_bpt.GetKey(), it_std->first);
// ASSERT_EQ(it_bpt.GetValue(), it_std->second);
// ++it_bpt;
// it_std++;
// }
// ASSERT_TRUE(it_bpt == bpt.end_const());
// ASSERT_EQ(bpt.Size(), entries.size());
// }
int id = rnd() % entries.size();
fprintf(stderr, "removing key[%d]=%s value[%d]=%d\n", id, entries[id].first.data, id, entries[id].second);
bpt.Remove(entries[id].first);
entries.erase(entries.begin() + id);
ASSERT_EQ(bpt.Size(), entries.size());
for (int j = 0; j < entries.size(); j++) {
ASSERT_EQ(bpt.Get(entries[j].first), entries[j].second);
}
{
// checking iteration
std::sort(entries.begin(), entries.end());
auto it_std = entries.begin();
auto it_bpt = bpt.lower_bound_const(entries[0].first);
for (int i = 0; i < entries.size(); i++) {
fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second);
ASSERT_TRUE(!(it_bpt == bpt.end_const()));
ASSERT_EQ(it_bpt.GetKey(), it_std->first);
ASSERT_EQ(it_bpt.GetValue(), it_std->second);
++it_bpt;
it_std++;
}
ASSERT_TRUE(it_bpt == bpt.end_const());
ASSERT_EQ(bpt.Size(), entries.size());
}
}
ASSERT_EQ(bpt.Size(), max_keys - keys_num_to_remove);
for (int i = 0; i < entries.size(); i++) {
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
}
}
delete bpm;
delete dm;
dm = new DiskManager(db_file_name.c_str());
bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
for (int i = 0; i < entries.size(); i++) {
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
}
}
delete bpm;
delete dm;
dm = new DiskManager(db_file_name.c_str());
bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
ASSERT_EQ(bpt.Size(), entries.size());
for (int i = 0; i < entries.size(); i++) {
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
}
sort(entries.begin(), entries.end());
for (int i = 0; i < entries.size(); i++) {
ASSERT_EQ(bpt.Get(entries[i].first), entries[i].second);
}
auto it_std = entries.begin();
auto it_bpt = bpt.lower_bound_const(entries[0].first);
for (int i = 0; i < entries.size(); i++) {
fprintf(stderr, "i=%d checking key[%d]=%s value[%d]=%d\n", i, i, it_std->first.data, i, it_std->second);
ASSERT_TRUE(!(it_bpt == bpt.end_const()));
ASSERT_EQ(it_bpt.GetKey(), it_std->first);
ASSERT_EQ(it_bpt.GetValue(), it_std->second);
++it_bpt;
it_std++;
}
ASSERT_TRUE(it_bpt == bpt.end_const());
ASSERT_EQ(bpt.Size(), entries.size());
}
delete bpm;
delete dm;
}
TEST(LONGLONG, huge_size_1) {
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(RndSeed);
typedef long long KeyType;
const std::string db_file_name = "/tmp/bpt18.db";
remove(db_file_name.c_str());
std::map<KeyType, int> entries;
const int max_keys = 10000;
const int keys_num_to_remove = 9900;
for (int i = 1; i <= max_keys; i++) {
KeyType key = rnd();
entries[key] = i;
}
DiskManager *dm = new DiskManager(db_file_name.c_str());
BufferPoolManager *bpm = new BufferPoolManager(20, 3, dm);
{
BPlusTreeIndexer<KeyType, std::less<KeyType>> bpt(bpm);
for (auto &entry : entries) {
bpt.Put(entry.first, entry.second);
}
for (int i = 1; i <= keys_num_to_remove; i++) {
if (rnd() % 2 == 0) {
int id = rnd() % entries.size();
auto it = entries.begin();
for (int j = 0; j < id; j++) it++;
fprintf(stderr, "removing key=%lld value=%d\n", it->first, it->second);
bpt.Remove(it->first);
entries.erase(it);
} else {
// Put
KeyType key = rnd();
int value = rnd();
fprintf(stderr, "inserting key=%lld value=%d\n", key, value);
bpt.Put(key, value);
entries[key] = value;
}
ASSERT_EQ(bpt.Size(), entries.size());
for (auto &entry : entries) {
ASSERT_EQ(bpt.Get(entry.first), entry.second);
}
{
// checking iteration
auto it_std = entries.begin();
auto it_bpt = bpt.lower_bound_const(entries.begin()->first);
for (int i = 0; i < entries.size(); i++) {
fprintf(stderr, "i=%d checking key=%lld value=%d\n", i, it_std->first, it_std->second);
ASSERT_TRUE(!(it_bpt == bpt.end_const()));
ASSERT_EQ(it_bpt.GetKey(), it_std->first);
ASSERT_EQ(it_bpt.GetValue(), it_std->second);
++it_bpt;
it_std++;
}
ASSERT_TRUE(it_bpt == bpt.end_const());
ASSERT_EQ(bpt.Size(), entries.size());
}
}
ASSERT_EQ(bpt.Size(), entries.size());
for (auto &entry : entries) {
ASSERT_EQ(bpt.Get(entry.first), entry.second);
}
}
}

View File

@ -777,7 +777,7 @@ TEST(RemoveTest, RM_1) {
TEST(RemoveTest, RM_2) {
const unsigned int RndSeed = testing::GTEST_FLAG(random_seed);
std::mt19937 rnd(1);
std::mt19937 rnd(RndSeed);
const int str_len = 800;
typedef bpt_basic_test::FixLengthString<str_len> KeyType;
fprintf(stderr, "sizeof(std::pair<KeyType, default_numeric_index_t>)=%lu\n",

64
test/t1_mk.cpp Normal file
View File

@ -0,0 +1,64 @@
#include <bits/stdc++.h>
using namespace std;
const unsigned int RndSeed = random_device{}();
mt19937 rnd(RndSeed);
int rnd_less(int n) { return rnd() % n; }
const int kMaxN = 5e4 + 10;
int fa[kMaxN];
inline int ff(int u) {
int x = u, y;
while (fa[u] != u) u = fa[u];
while (x != u) {
y = fa[x];
fa[x] = u;
x = y;
}
return u;
}
int main(int argc, char *argv[]) {
cerr << "[ Data Generator: Seed = " << RndSeed << " ]" << endl;
FILE *fout = fopen("GeneratorSeed.txt", "a");
fprintf(fout, "Seed = %u\n", RndSeed);
fclose(fout);
// ======================================
int n = 1000;
int total_keys = 300;
set<string> keys_set;
for (int i = 0; i < total_keys; i++) {
string key = "#" + to_string(rnd_less(1000000)) + "#";
keys_set.insert(key);
}
vector<string> keys_vec;
unordered_map<string, vector<int>> mp;
for (auto &key : keys_set) {
keys_vec.push_back(key);
}
cout << n << endl;
for (int i = 0; i < n; i++) {
int tmp = rnd() % 10;
if (tmp <= 4) {
string key = keys_vec[rnd_less(keys_vec.size())];
int val = rnd_less(1000000);
cout << "insert " << key << " " << val << "\n";
// 往 mp[key] 有序地插入
auto it = lower_bound(mp[key].begin(), mp[key].end(), val);
if (it == mp[key].end() || *it != val) mp[key].insert(it, val);
} else if (tmp <= 6) {
string key = keys_vec[rnd_less(keys_vec.size())];
int val = rnd_less(1000000);
if (rnd() % 2 == 0 && mp[key].size() > 0) {
// 选择一个有意义的删除项
val = mp[key][rnd_less(mp[key].size())];
}
cout << "delete " << key << " " << val << "\n";
// 从 mp[key] 中删除 val
auto it = lower_bound(mp[key].begin(), mp[key].end(), val);
if (it != mp[key].end() && *it == val) mp[key].erase(it);
if (mp[key].empty()) mp.erase(key);
} else {
string key = keys_vec[rnd_less(keys_vec.size())];
cout << "find " << key << "\n";
}
}
return 0;
}

View File

@ -1,4 +1,4 @@
// 此程序仅用于对拍
// 此程序仅用于对拍,并测试是否存在哈希冲突
#include <cstdint>
#include <cstdio>
#include <fstream>
@ -6,7 +6,6 @@
#include <set>
#include <string>
#include <unordered_map>
#include "bpt/disk_manager.h"
typedef uint64_t hash_t;
inline hash_t Hash(std::string str) noexcept {
constexpr static char salt1[10] = "mL;]-=eT";