From 36a615aaad4f586d2a13bebab36465b7398dc08d Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Thu, 25 Apr 2024 10:10:49 +0000 Subject: [PATCH] first version of fast lruk --- CMakeLists.txt | 1 + bpt/CMakeLists.txt | 1 + bpt/include/bpt/config.h | 9 ++ bpt/include/bpt/disk_manager.h | 46 +++++++++ bpt/include/bpt/replacer.h | 72 +++++++++++++ bpt/src/disk_manager.cpp | 103 +++++++++++++++++++ bpt/src/replacer.cpp | 157 +++++++++++++++++++++++++++++ test/CMakeLists.txt | 6 +- test/oj_test_interface_for_bpt.cpp | 2 +- test/replacer_test.cpp | 104 +++++++++++++++++++ 10 files changed, 498 insertions(+), 3 deletions(-) create mode 100644 bpt/CMakeLists.txt create mode 100644 bpt/include/bpt/config.h create mode 100644 bpt/include/bpt/disk_manager.h create mode 100644 bpt/include/bpt/replacer.h create mode 100644 bpt/src/disk_manager.cpp create mode 100644 bpt/src/replacer.cpp create mode 100644 test/replacer_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e013ad..6b31d76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,5 +87,6 @@ include_directories(${CMAKE_SOURCE_DIR}/stlite) include(CTest) enable_testing() +add_subdirectory(bpt) add_subdirectory(test) add_subdirectory(src) \ No newline at end of file diff --git a/bpt/CMakeLists.txt b/bpt/CMakeLists.txt new file mode 100644 index 0000000..4bf7665 --- /dev/null +++ b/bpt/CMakeLists.txt @@ -0,0 +1 @@ +add_library(bpt STATIC src/disk_manager.cpp src/replacer.cpp) \ No newline at end of file diff --git a/bpt/include/bpt/config.h b/bpt/include/bpt/config.h new file mode 100644 index 0000000..bd95108 --- /dev/null +++ b/bpt/include/bpt/config.h @@ -0,0 +1,9 @@ +#ifndef CONFIG_H +#define CONFIG_H +#include +extern const size_t kPageSize; +typedef unsigned int default_numeric_index_t; +typedef default_numeric_index_t page_id_t; +typedef default_numeric_index_t block_id_t; +typedef default_numeric_index_t frame_id_t; +#endif \ No newline at end of file diff --git a/bpt/include/bpt/disk_manager.h b/bpt/include/bpt/disk_manager.h new file mode 100644 index 0000000..57bff04 --- /dev/null +++ b/bpt/include/bpt/disk_manager.h @@ -0,0 +1,46 @@ +#ifndef DISK_MANAGER_H +#define DISK_MANAGER_H +#include +#include +#include "config.h" +extern const size_t kPageSize; +class DiskManager { + /** + * The Data Structure on Disk: + * [Internal Page] [Page 1] [Page 2] ..... + * In Internal Page, the first meta_data_size bytes are used to store + * metadata(first_empty_page_id, current_total_page_count, current_none_empty_page_count), the rest are allocated to + * raw_data_memory. + * When a page is Deallocated, the first sizeof(page_id_t) bytes are used to store the next empty page + * id, then update first_empty_page_id, just like a list. To avoid unnecessary cache, use C style file operation + * instead of fstream. Note that the page_id is the offset of the page in the file, as the first page is internal, + * thus page_id is 1-based. In the list of empty pages, if the there is no next empty page, the value is 0(the same + * for first_empty_page_id). + */ + public: + explicit DiskManager(const std::string &file_path_); + ~DiskManager(); + char *RawDataMemory(); + size_t RawDatMemorySize(); + void FlushInternalPage(); + void Close(); + void ReadPage(page_id_t page_id, char *page_data_ptr); + void WritePage(page_id_t page_id, const char *page_data_ptr); // in fact, the page_id is the offest + bool CurrentFileIsNew(); + page_id_t AllocNewEmptyPageId(); + void DeallocatePage(page_id_t page_id); + size_t CurrentTotalPageCount(); + size_t CurrentNoneEmptyPageCount(); + + private: + std::string file_path; + page_id_t first_empty_page_id; + size_t current_total_page_count; + size_t current_none_empty_page_count; + static const size_t meta_data_size = sizeof(page_id_t) + sizeof(size_t) + sizeof(size_t); + char *raw_data_memory; + FILE *fp; + bool is_new; + char *page_buf; +}; +#endif \ No newline at end of file diff --git a/bpt/include/bpt/replacer.h b/bpt/include/bpt/replacer.h new file mode 100644 index 0000000..9bbca16 --- /dev/null +++ b/bpt/include/bpt/replacer.h @@ -0,0 +1,72 @@ +#ifndef REPLACER_H +#define REPLACER_H +#include +#include +#include "bpt/config.h" +class LRUKReplacer { + public: + LRUKReplacer() = delete; + explicit LRUKReplacer(size_t max_frame_count, size_t k_value); + ~LRUKReplacer(); + bool TryEvictLeastImportant(frame_id_t &frame_id); + void RecordAccess(frame_id_t frame_id); + void SetEvictable(frame_id_t frame_id, bool evitable); + bool TryEvictExactFrame(frame_id_t frame_id); + LRUKReplacer &operator=(const LRUKReplacer &) = delete; + size_t GetCurrentEvitableCount(); + + private: + struct LRUChainNodeType { // for not has k visit + frame_id_t frame_id; + LRUChainNodeType *prev, *next; + LRUChainNodeType() = delete; + explicit LRUChainNodeType(frame_id_t frame_id, LRUChainNodeType *prev, LRUChainNodeType *next) + : frame_id(frame_id), prev(prev), next(next) {} + }; + struct MainChainNodeType { // for has k visit + frame_id_t frame_id; + size_t time_stamp; + MainChainNodeType *prev, *next; + MainChainNodeType *next_self_record; + MainChainNodeType() = delete; + explicit MainChainNodeType(frame_id_t frame_id, size_t time_stamp, MainChainNodeType *prev, MainChainNodeType *next, + MainChainNodeType *next_self_record) + : frame_id(frame_id), time_stamp(time_stamp), prev(prev), next(next), next_self_record(next_self_record) {} + }; + template + inline void RemoveFromList(ListNodeType *node) { + if (node->prev != nullptr) { + node->prev->next = node->next; + } + if (node->next != nullptr) { + node->next->prev = node->prev; + } + } + template + inline void InsertAt(ListNodeType *node, ListNodeType *prev, ListNodeType *next) { + node->prev = prev; + node->next = next; + if (prev != nullptr) prev->next = node; + if (next != nullptr) next->prev = node; + } + struct LRUKRecord { + LRUKRecord() = default; + bool evitable; + size_t visit_count; + bool active; + MainChainNodeType *head_node_in_main_chain, *tail_node_in_main_chain; + LRUChainNodeType *node_in_LRU_chain; + }; + void RemoveWholeFrameFromLRUKChain(MainChainNodeType *first_occurrence_ptr); // remove and delete nodes + MainChainNodeType *AddRecordToMainChain(frame_id_t frame_it, size_t time_stamp, + MainChainNodeType *last_node_in_main_chain); + LRUChainNodeType *LRU_chain_head_guard, *LRU_chain_tail_guard; + MainChainNodeType *LRUK_chain_head_guard, *LRUK_chain_tail_guard; + size_t current_timestamp_{0}; + size_t current_evitable_count_{0}; + size_t max_frame_count; + size_t k_value; + std::mutex latch; + LRUKRecord *hash_for_record; +}; +#endif \ No newline at end of file diff --git a/bpt/src/disk_manager.cpp b/bpt/src/disk_manager.cpp new file mode 100644 index 0000000..6669c63 --- /dev/null +++ b/bpt/src/disk_manager.cpp @@ -0,0 +1,103 @@ +#include "bpt/disk_manager.h" +#include +#include +#include +DiskManager::DiskManager(const std::string &file_path_) + : file_path(file_path_), + first_empty_page_id(0), + current_total_page_count(0), + current_none_empty_page_count(0), + raw_data_memory(nullptr), + fp(nullptr) { + fp = fopen(file_path.c_str(), "r+b"); + if (fp == nullptr) { + // File doesn't exist, create a new one + fp = fopen(file_path.c_str(), "w+b"); + // Initialize internal page + first_empty_page_id = 0; + current_total_page_count = 0; + current_none_empty_page_count = 0; + raw_data_memory = new char[kPageSize - meta_data_size]; + memset(raw_data_memory, 0, kPageSize - meta_data_size); + FlushInternalPage(); + is_new = true; + } else { + // File exists, read metadata from internal page + fseek(fp, 0, SEEK_SET); + fread(&first_empty_page_id, sizeof(page_id_t), 1, fp); + fread(¤t_total_page_count, sizeof(size_t), 1, fp); + fread(¤t_none_empty_page_count, sizeof(size_t), 1, fp); + raw_data_memory = new char[kPageSize - meta_data_size]; + fread(raw_data_memory, kPageSize - meta_data_size, 1, fp); + is_new = false; + } + page_buf = new char[kPageSize]; +} + +DiskManager::~DiskManager() { + Close(); + delete[] raw_data_memory; + delete[] page_buf; +} + +char *DiskManager::RawDataMemory() { return raw_data_memory; } + +size_t DiskManager::RawDatMemorySize() { return kPageSize - meta_data_size; } + +void DiskManager::FlushInternalPage() { + fseek(fp, 0, SEEK_SET); + fwrite(&first_empty_page_id, sizeof(page_id_t), 1, fp); + fwrite(¤t_total_page_count, sizeof(size_t), 1, fp); + fwrite(¤t_none_empty_page_count, sizeof(size_t), 1, fp); + fwrite(raw_data_memory, kPageSize - meta_data_size, 1, fp); + fflush(fp); +} + +void DiskManager::Close() { + if (fp != nullptr) { + FlushInternalPage(); + fclose(fp); + fp = nullptr; + } +} + +void DiskManager::ReadPage(page_id_t page_id, char *page_data_ptr) { + fseek(fp, page_id * kPageSize, SEEK_SET); + fread(page_data_ptr, kPageSize, 1, fp); +} + +void DiskManager::WritePage(page_id_t page_id, const char *page_data_ptr) { + fseek(fp, page_id * kPageSize, SEEK_SET); + fwrite(page_data_ptr, kPageSize, 1, fp); +} + +bool DiskManager::CurrentFileIsNew() { return is_new; } + +page_id_t DiskManager::AllocNewEmptyPageId() { + page_id_t new_page_id; + if (first_empty_page_id == 0) { + // No empty page available, append a new page + current_total_page_count++; + new_page_id = current_total_page_count; + fseek(fp, 0, SEEK_END); + fwrite(page_buf, kPageSize, 1, fp); + } else { + new_page_id = first_empty_page_id; + ReadPage(new_page_id, page_buf); + memcpy(&first_empty_page_id, page_buf, sizeof(page_id_t)); + } + current_none_empty_page_count++; + return new_page_id; +} + +void DiskManager::DeallocatePage(page_id_t page_id) { + // Add the deallocated page to the head of the empty list + memcpy(page_buf, &first_empty_page_id, sizeof(page_id_t)); + WritePage(page_id, page_buf); + first_empty_page_id = page_id; + current_none_empty_page_count--; +} + +size_t DiskManager::CurrentTotalPageCount() { return current_total_page_count; } + +size_t DiskManager::CurrentNoneEmptyPageCount() { return current_none_empty_page_count; } \ No newline at end of file diff --git a/bpt/src/replacer.cpp b/bpt/src/replacer.cpp new file mode 100644 index 0000000..9b823a0 --- /dev/null +++ b/bpt/src/replacer.cpp @@ -0,0 +1,157 @@ +#include "bpt/replacer.h" +#include +LRUKReplacer::LRUKReplacer(size_t max_frame_count, size_t k_value) + : max_frame_count(max_frame_count), k_value(k_value) { + hash_for_record = new LRUKRecord[max_frame_count]; + for (size_t i = 0; i < max_frame_count; i++) { + hash_for_record[i].active = false; + } + LRU_chain_head_guard = new LRUChainNodeType(-1, nullptr, nullptr); + LRU_chain_tail_guard = new LRUChainNodeType(-1, nullptr, nullptr); + LRU_chain_head_guard->next = LRU_chain_tail_guard; + LRU_chain_tail_guard->prev = LRU_chain_head_guard; + LRUK_chain_head_guard = new MainChainNodeType(-1, 0, nullptr, nullptr, nullptr); + LRUK_chain_tail_guard = new MainChainNodeType(-1, 0, nullptr, nullptr, nullptr); + LRUK_chain_head_guard->next = LRUK_chain_tail_guard; + LRUK_chain_tail_guard->prev = LRUK_chain_head_guard; +} + +LRUKReplacer::~LRUKReplacer() { + delete[] hash_for_record; + LRUChainNodeType *ptr = LRU_chain_head_guard->next; + while (ptr != LRU_chain_tail_guard) { + LRUChainNodeType *tmp = ptr; + ptr = ptr->next; + delete tmp; + } + MainChainNodeType *ptr2 = LRUK_chain_head_guard->next; + while (ptr2 != LRUK_chain_tail_guard) { + MainChainNodeType *tmp = ptr2; + ptr2 = ptr2->next; + delete tmp; + } + delete LRU_chain_head_guard; + delete LRU_chain_tail_guard; + delete LRUK_chain_head_guard; + delete LRUK_chain_tail_guard; +} + +void LRUKReplacer::SetEvictable(frame_id_t frame_id, bool evitable) { + std::lock_guard guard(latch); + if (!hash_for_record[frame_id].active) { + return; + } + if (hash_for_record[frame_id].evitable == evitable) { + return; + } + hash_for_record[frame_id].evitable = evitable; + if (evitable) { + current_evitable_count_++; + } else { + current_evitable_count_--; + } +} + +void LRUKReplacer::RemoveWholeFrameFromLRUKChain(MainChainNodeType *first_occurrence_ptr) { + if (first_occurrence_ptr == nullptr) return; + MainChainNodeType *tmp; + while (first_occurrence_ptr != nullptr) { + tmp = first_occurrence_ptr; + RemoveFromList(tmp); + first_occurrence_ptr = first_occurrence_ptr->next_self_record; + delete tmp; + } +} + +LRUKReplacer::MainChainNodeType *LRUKReplacer::AddRecordToMainChain(frame_id_t frame_id, size_t time_stamp, + MainChainNodeType *last_node_in_main_chain) { + MainChainNodeType *new_node = new LRUKReplacer::MainChainNodeType(frame_id, time_stamp, nullptr, nullptr, nullptr); + if (last_node_in_main_chain != nullptr) last_node_in_main_chain->next_self_record = new_node; + InsertAt(new_node, LRUK_chain_tail_guard->prev, LRUK_chain_tail_guard); + return new_node; +} + +bool LRUKReplacer::TryEvictExactFrame(frame_id_t frame_id) { + std::lock_guard guard(latch); + if (!hash_for_record[frame_id].active) { + return false; + } + if (!hash_for_record[frame_id].evitable) { + return false; + } + LRUChainNodeType *node = hash_for_record[frame_id].node_in_LRU_chain; + if (node != nullptr) RemoveFromList(node); + delete node; + hash_for_record[frame_id].node_in_LRU_chain = nullptr; + RemoveWholeFrameFromLRUKChain(hash_for_record[frame_id].head_node_in_main_chain); + hash_for_record[frame_id].active = false; + current_evitable_count_--; + return true; +} + +bool LRUKReplacer::TryEvictLeastImportant(frame_id_t &frame_id) { + latch.lock(); + if (current_evitable_count_ == 0) { + latch.unlock(); + return false; + } + LRUChainNodeType *node = LRU_chain_head_guard->next; + while (node != LRU_chain_tail_guard) { + frame_id = node->frame_id; + if (hash_for_record[frame_id].evitable) { + latch.unlock(); + return TryEvictExactFrame(frame_id); + } + node = node->next; + } + MainChainNodeType *main_chain_node = LRUK_chain_head_guard->next; + while (main_chain_node != LRUK_chain_tail_guard) { + frame_id = main_chain_node->frame_id; + if (hash_for_record[frame_id].evitable) { + latch.unlock(); + return TryEvictExactFrame(frame_id); + } + main_chain_node = main_chain_node->next; + } + latch.unlock(); + return false; +} + +void LRUKReplacer::RecordAccess(frame_id_t frame_id) { + std::lock_guard guard(latch); + current_timestamp_++; + if (!hash_for_record[frame_id].active) { + hash_for_record[frame_id].active = true; + hash_for_record[frame_id].evitable = false; + hash_for_record[frame_id].visit_count = 1; + hash_for_record[frame_id].head_node_in_main_chain = hash_for_record[frame_id].tail_node_in_main_chain = + AddRecordToMainChain(frame_id, current_timestamp_, nullptr); + hash_for_record[frame_id].node_in_LRU_chain = + new LRUChainNodeType(frame_id, LRU_chain_tail_guard->prev, LRU_chain_tail_guard); + InsertAt(hash_for_record[frame_id].node_in_LRU_chain, LRU_chain_tail_guard->prev, LRU_chain_tail_guard); + } else { + hash_for_record[frame_id].visit_count++; + MainChainNodeType *last_occurrence_ptr = hash_for_record[frame_id].tail_node_in_main_chain; + MainChainNodeType *new_node_in_main_chain_ptr = + AddRecordToMainChain(frame_id, current_timestamp_, last_occurrence_ptr); + hash_for_record[frame_id].tail_node_in_main_chain = new_node_in_main_chain_ptr; + if (hash_for_record[frame_id].node_in_LRU_chain != nullptr) { + RemoveFromList(hash_for_record[frame_id].node_in_LRU_chain); + delete hash_for_record[frame_id].node_in_LRU_chain; + hash_for_record[frame_id].node_in_LRU_chain = nullptr; + } + if (hash_for_record[frame_id].visit_count < k_value) { + hash_for_record[frame_id].node_in_LRU_chain = + new LRUChainNodeType(frame_id, LRU_chain_tail_guard->prev, LRU_chain_tail_guard); + InsertAt(hash_for_record[frame_id].node_in_LRU_chain, LRU_chain_tail_guard->prev, LRU_chain_tail_guard); + } + if (hash_for_record[frame_id].visit_count > k_value) { + MainChainNodeType *first_occurrence_ptr = hash_for_record[frame_id].head_node_in_main_chain; + RemoveFromList(first_occurrence_ptr); + hash_for_record[frame_id].head_node_in_main_chain = first_occurrence_ptr->next_self_record; + delete first_occurrence_ptr; + } + } +} + +size_t LRUKReplacer::GetCurrentEvitableCount() { return current_evitable_count_; } \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fd79814..4bfa0d3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,7 @@ if(OJ_TEST_BPT) add_executable(code oj_test_interface_for_bpt.cpp) - target_link_libraries(code argparse) + target_link_libraries(code bpt) set_target_properties(code PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -endif() \ No newline at end of file +endif() +add_executable(replacer_test replacer_test.cpp) +target_link_libraries(replacer_test bpt GTest::gtest_main) \ No newline at end of file diff --git a/test/oj_test_interface_for_bpt.cpp b/test/oj_test_interface_for_bpt.cpp index 662973d..4370393 100644 --- a/test/oj_test_interface_for_bpt.cpp +++ b/test/oj_test_interface_for_bpt.cpp @@ -1,4 +1,4 @@ -#include +#include "bpt/disk_manager.h" int main() { return 0; diff --git a/test/replacer_test.cpp b/test/replacer_test.cpp new file mode 100644 index 0000000..e000adb --- /dev/null +++ b/test/replacer_test.cpp @@ -0,0 +1,104 @@ +#include "bpt/replacer.h" +#include +#include "bpt/config.h" +// Demonstrate some basic assertions. +TEST(HelloTest, BasicAssertions) { + // Expect two strings not to be equal. + EXPECT_STRNE("hello", "world"); + // Expect equality. + EXPECT_EQ(7 * 6, 42); +} + +TEST(BasicTest, Basic1) { + LRUKReplacer replacer(6, 3); + replacer.RecordAccess(0); + replacer.RecordAccess(0); + replacer.RecordAccess(0); + frame_id_t frame_id; + ASSERT_EQ(replacer.TryEvictLeastImportant(frame_id), false); + replacer.RecordAccess(1); + replacer.SetEvictable(0, true); + replacer.SetEvictable(1, true); + ASSERT_EQ(replacer.TryEvictLeastImportant(frame_id), true); + ASSERT_EQ(frame_id, 1); +} + +TEST(BasicTest, CopiedFromBustubProject) { + LRUKReplacer lru_replacer(7, 2); + + // Scenario: add six elements to the replacer. We have [1,2,3,4,5]. Frame 6 is non-evictable. + lru_replacer.RecordAccess(1); + lru_replacer.RecordAccess(2); + lru_replacer.RecordAccess(3); + lru_replacer.RecordAccess(4); + lru_replacer.RecordAccess(5); + lru_replacer.RecordAccess(6); + lru_replacer.SetEvictable(1, true); + lru_replacer.SetEvictable(2, true); + lru_replacer.SetEvictable(3, true); + lru_replacer.SetEvictable(4, true); + lru_replacer.SetEvictable(5, true); + lru_replacer.SetEvictable(6, false); + ASSERT_EQ(5, lru_replacer.GetCurrentEvitableCount()); + + // Scenario: Insert access history for frame 1. Now frame 1 has two access histories. + // All other frames have max backward k-dist. The order of eviction is [2,3,4,5,1]. + lru_replacer.RecordAccess(1); + + // Scenario: Evict three pages from the replacer. Elements with max k-distance should be popped + // first based on LRU. + frame_id_t value; + lru_replacer.TryEvictLeastImportant(value); + ASSERT_EQ(2, value); + lru_replacer.TryEvictLeastImportant(value); + ASSERT_EQ(3, value); + lru_replacer.TryEvictLeastImportant(value); + ASSERT_EQ(4, value); + ASSERT_EQ(2, lru_replacer.GetCurrentEvitableCount()); + + // Scenario: Now replacer has frames [5,1]. + // Insert new frames 3, 4, and update access history for 5. We should end with [3,1,5,4] + lru_replacer.RecordAccess(3); + lru_replacer.RecordAccess(4); + lru_replacer.RecordAccess(5); + lru_replacer.RecordAccess(4); + lru_replacer.SetEvictable(3, true); + lru_replacer.SetEvictable(4, true); + ASSERT_EQ(4, lru_replacer.GetCurrentEvitableCount()); + + // Scenario: continue looking for victims. We expect 3 to be evicted next. + lru_replacer.TryEvictLeastImportant(value); + ASSERT_EQ(3, value); + ASSERT_EQ(3, lru_replacer.GetCurrentEvitableCount()); + + // Set 6 to be evictable. 6 Should be evicted next since it has max backward k-dist. + lru_replacer.SetEvictable(6, true); + ASSERT_EQ(4, lru_replacer.GetCurrentEvitableCount()); + lru_replacer.TryEvictLeastImportant(value); + ASSERT_EQ(6, value); + ASSERT_EQ(3, lru_replacer.GetCurrentEvitableCount()); + + // Now we have [1,5,4]. Continue looking for victims. + lru_replacer.SetEvictable(1, false); + ASSERT_EQ(2, lru_replacer.GetCurrentEvitableCount()); + ASSERT_EQ(true, lru_replacer.TryEvictLeastImportant(value)); + ASSERT_EQ(5, value); + ASSERT_EQ(1, lru_replacer.GetCurrentEvitableCount()); + + // Update access history for 1. Now we have [4,1]. Next victim is 4. + lru_replacer.RecordAccess(1); + lru_replacer.RecordAccess(1); + lru_replacer.SetEvictable(1, true); + ASSERT_EQ(2, lru_replacer.GetCurrentEvitableCount()); + ASSERT_EQ(true, lru_replacer.TryEvictLeastImportant(value)); + ASSERT_EQ(value, 4); + + ASSERT_EQ(1, lru_replacer.GetCurrentEvitableCount()); + lru_replacer.TryEvictLeastImportant(value); + ASSERT_EQ(value, 1); + ASSERT_EQ(0, lru_replacer.GetCurrentEvitableCount()); + + // This operation should not modify size + ASSERT_EQ(false, lru_replacer.TryEvictLeastImportant(value)); + ASSERT_EQ(0, lru_replacer.GetCurrentEvitableCount()); +} \ No newline at end of file