docs: optimize drivearray

This commit is contained in:
2023-12-05 15:34:22 +00:00
parent e76bdbd436
commit 68de72efb0
2 changed files with 167 additions and 151 deletions

View File

@ -1,181 +1,203 @@
#ifndef BPT_DriveArray_HPP
#define BPT_DriveArray_HPP
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <algorithm>
#include <cstring>
#include <mutex>
#include <fstream>
#include <queue>
#include <stack>
#include <string>
template <class T, const int info_len = 2, const int kRefreshThreshold = 100>
#include <unordered_map>
template <class T, const int info_len = 2, const int kBufSize = 100>
class DriveArray {
private:
static const int kPageSize = 4096;
int file_descriptor = -1;
size_t file_length = 0;
void *virtual_mem;
std::string file_name;
const int sizeofT = sizeof(T);
const int raw_data_begin =
static const int kDataBiginOffset =
((info_len + 2) * sizeof(int) + kPageSize - 1) / kPageSize * kPageSize;
std::stack<int> free_mem;
int total_mem = 0;
unsigned int forced_refresh = 0;
void reallocate(bool include_resync = false) noexcept {
size_t length_needed =
raw_data_begin +
(sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize;
if (include_resync)
length_needed += (free_mem.size() * sizeof(int) + kPageSize - 1) /
kPageSize * kPageSize;
if (file_length >= length_needed) return;
munmap(virtual_mem, file_length);
file_length = std::max(file_length * 2, length_needed);
ftruncate(file_descriptor, file_length);
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
file_descriptor, 0);
static const int sizeofT = sizeof(T);
struct DataType {
int next_vacant_data_index;
T val;
};
static const int kBlockSize =
(sizeof(DataType) + kPageSize - 1) / kPageSize * kPageSize;
static const int kDataPerBlock = kBlockSize / sizeof(DataType);
struct BlockType {
DataType data[kDataPerBlock];
};
char rest[kBlockSize - sizeof(BlockType)];
static_assert(kBlockSize % kPageSize == 0, "kBlockSize % kPageSize != 0");
std::string file_name;
int total_block_number = 0, first_vacant_data_index = 0;
/**
* DataIndex=(BlockIndex-1)*kDataPerBlock+InnnerIndex
*/
std::unordered_map<int, BlockType *> cache;
std::queue<int> vis_que;
void LoadCache(int block_index) {
BlockType *tmp = new BlockType;
fs.seekg(kDataBiginOffset + (block_index - 1) * kBlockSize, std::ios::beg);
fs.read(reinterpret_cast<char *>(tmp), sizeof(BlockType));
cache[block_index] = tmp;
vis_que.push(block_index);
}
void ReleaseOldestCache() {
int block_index = vis_que.front();
vis_que.pop();
fs.seekp(kDataBiginOffset + (block_index - 1) * kBlockSize, std::ios::beg);
fs.write(reinterpret_cast<char *>(cache[block_index]), sizeof(BlockType));
delete cache[block_index];
cache.erase(block_index);
}
BlockType *OrderBlock(int block_index) {
if (cache.find(block_index) != cache.end()) return cache[block_index];
if (cache.size() == kBufSize) ReleaseOldestCache();
LoadCache(block_index);
return cache[block_index];
}
int AppEndBlock() {
fs.seekp(0, std::ios::end);
BlockType tmp;
fs.write(reinterpret_cast<char *>(&tmp), sizeof(BlockType));
fs.write(rest, kBlockSize - sizeof(BlockType));
++total_block_number;
return total_block_number;
}
public:
std::fstream fs;
DriveArray() = default;
inline bool IsOpen() const noexcept { return file_descriptor >= 0; }
~DriveArray() {
if (file_descriptor >= 0) {
reallocate(true);
int stk_data_begin =
raw_data_begin +
(sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize;
*((int *)(virtual_mem) + info_len) = total_mem;
*((int *)(virtual_mem) + info_len + 1) = (int)free_mem.size();
int *p = (int *)(virtual_mem + stk_data_begin);
while (!free_mem.empty()) {
*(p++) = free_mem.top();
free_mem.pop();
}
munmap(virtual_mem, file_length);
close(file_descriptor);
file_descriptor = -1;
}
}
bool operator=(const DriveArray &) = delete;
void ForceRefresh() noexcept {
munmap(virtual_mem, file_length);
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
file_descriptor, 0);
}
void *RawData() noexcept { return virtual_mem; }
void OpenFile(const std::string &file_name) {
if (file_name == "") return;
if (file_descriptor >= 0) {
munmap(virtual_mem, file_length);
close(file_descriptor);
file_descriptor = -1;
}
file_descriptor =
open(file_name.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
struct stat file_state;
fstat(file_descriptor, &file_state);
file_length = file_state.st_size;
if (file_length == 0) {
file_length = 1024 * 1024;
file_length =
std::max(file_length, ((info_len + 2) * sizeof(int) + kPageSize - 1) /
kPageSize * kPageSize);
ftruncate(file_descriptor, file_length);
}
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
file_descriptor, 0);
total_mem = *((int *)(virtual_mem) + info_len);
int free_mem_cnt = *((int *)(virtual_mem) + info_len + 1);
int stk_data_begin =
raw_data_begin +
(sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize;
int *p = (int *)(virtual_mem + stk_data_begin);
for (int i = 0; i < free_mem_cnt; i++) {
free_mem.push(*(p++));
}
}
DriveArray(const DriveArray &) = delete;
DriveArray &operator=(const DriveArray &) = delete;
DriveArray(const std::string &file_name) : file_name(file_name) {
OpenFile(file_name);
}
void initialise(std::string FN = "") {
if (FN != "") file_name = FN;
if (file_descriptor >= 0) {
munmap(virtual_mem, file_length);
close(file_descriptor);
file_descriptor = -1;
inline bool IsOpen() const noexcept { return fs.is_open(); }
~DriveArray() { CloseFile(); }
void CloseFile() {
if (!fs.is_open()) return;
while (cache.size() > 0) ReleaseOldestCache();
fs.seekp(sizeof(int) * info_len, std::ios::beg);
fs.write(reinterpret_cast<char *>(&first_vacant_data_index), sizeof(int));
fs.write(reinterpret_cast<char *>(&total_block_number), sizeof(int));
fs.close();
file_name = "";
first_vacant_data_index = 0;
total_block_number = 0;
}
void OpenFile(const std::string &__file_name) {
if (fs.is_open()) CloseFile();
file_name = __file_name;
fs.open(file_name, std::ios::in | std::ios::out | std::ios::binary);
if (!fs.is_open()) {
fs.open(file_name, std::ios::out | std::ios::binary);
fs.seekp(0, std::ios::beg);
int tmp = 0;
total_block_number = 0;
first_vacant_data_index = 0;
for (int i = 0; i < kDataBiginOffset / sizeof(int); ++i) {
fs.write(reinterpret_cast<char *>(&tmp), sizeof(int));
}
fs.close();
fs.open(file_name, std::ios::in | std::ios::out | std::ios::binary);
}
file_descriptor =
open(file_name.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
file_length = 1024 * 1024;
file_length =
std::max(file_length, ((info_len + 2) * sizeof(int) + kPageSize - 1) /
kPageSize * kPageSize);
ftruncate(file_descriptor, file_length);
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
file_descriptor, 0);
for (int i = 0; i < info_len; i++) *((int *)(virtual_mem) + i) = 0;
fs.seekg(sizeof(int) * info_len, std::ios::beg);
fs.read(reinterpret_cast<char *>(&first_vacant_data_index), sizeof(int));
fs.read(reinterpret_cast<char *>(&total_block_number), sizeof(int));
}
void initialise(std::string FN = "") {
if (fs.is_open()) {
std::string name_bak = file_name;
CloseFile();
file_name = name_bak;
}
if (FN != "") file_name = FN;
if (file_name == "") return;
fs.open(file_name, std::ios::out | std::ios::binary);
fs.seekp(0, std::ios::beg);
int tmp = 0;
total_block_number = 0;
first_vacant_data_index = 0;
for (int i = 0; i < kDataBiginOffset / sizeof(int); ++i) {
fs.write(reinterpret_cast<char *>(&tmp), sizeof(int));
}
fs.close();
fs.open(file_name, std::ios::in | std::ios::out | std::ios::binary);
}
void get_info(int &tmp, int n) noexcept {
if (n > info_len) return;
tmp = *((int *)(virtual_mem) + n - 1);
if (++forced_refresh >= kRefreshThreshold) {
forced_refresh = 0;
ForceRefresh();
}
fs.seekg((n - 1) * sizeof(int), std::ios::beg);
fs.read(reinterpret_cast<char *>(&tmp), sizeof(int));
}
void write_info(int tmp, int n) noexcept {
if (n > info_len) return;
*((int *)(virtual_mem) + n - 1) = tmp;
if (++forced_refresh >= kRefreshThreshold) {
forced_refresh = 0;
ForceRefresh();
}
fs.seekp((n - 1) * sizeof(int), std::ios::beg);
fs.write(reinterpret_cast<char *>(&tmp), sizeof(int));
}
void LoadInfoTo(int *dest) {
fs.seekg(0, std::ios::beg);
fs.read(reinterpret_cast<char *>(dest), sizeof(int) * info_len);
}
void WriteInfoFrom(int *src) {
fs.seekp(0, std::ios::beg);
fs.write(reinterpret_cast<char *>(src), sizeof(int) * info_len);
}
int write(T &t) noexcept {
int index = -1;
if (!free_mem.empty()) {
index = free_mem.top();
free_mem.pop();
} else
index = ++total_mem;
update(t, index);
return index;
if (first_vacant_data_index == 0) {
int new_block_index = AppEndBlock();
int index = (new_block_index - 1) * kDataPerBlock + 1;
BlockType *blk_ptr = OrderBlock(new_block_index);
if (kDataPerBlock > 1)
first_vacant_data_index = index + 1;
else
first_vacant_data_index = 0;
for (int i = 1; i < kDataPerBlock - 1; i++)
blk_ptr->data[i].next_vacant_data_index = index + i + 1;
blk_ptr->data[kDataPerBlock - 1].next_vacant_data_index = 0;
blk_ptr->data[0].next_vacant_data_index = 0;
blk_ptr->data[0].val = t;
return index;
} else {
int block_index = (first_vacant_data_index - 1) / kDataPerBlock + 1;
int inner_index = (first_vacant_data_index - 1) % kDataPerBlock + 1;
BlockType *blk_ptr = OrderBlock(block_index);
int index = first_vacant_data_index;
first_vacant_data_index =
blk_ptr->data[inner_index - 1].next_vacant_data_index;
blk_ptr->data[inner_index - 1].next_vacant_data_index = 0;
blk_ptr->data[inner_index - 1].val = t;
return index;
}
}
void update(T &t, const int index) noexcept {
reallocate();
void *data_begin = virtual_mem + raw_data_begin + sizeofT * (index - 1);
std::memmove(data_begin, &t, sizeofT);
// madvise(data_begin, sizeofT, MADV_FREE);
if (++forced_refresh >= kRefreshThreshold) {
forced_refresh = 0;
ForceRefresh();
}
int block_index = (index - 1) / kDataPerBlock + 1;
int inner_index = (index - 1) % kDataPerBlock + 1;
BlockType *blk_ptr = OrderBlock(block_index);
blk_ptr->data[inner_index - 1].val = t;
}
void read(T &t, const int index) noexcept {
reallocate();
void *data_begin = virtual_mem + raw_data_begin + sizeofT * (index - 1);
std::memmove(&t, data_begin, sizeofT);
// madvise(data_begin, sizeofT, MADV_FREE);
if (++forced_refresh >= kRefreshThreshold) {
forced_refresh = 0;
ForceRefresh();
}
int block_index = (index - 1) / kDataPerBlock + 1;
int inner_index = (index - 1) % kDataPerBlock + 1;
BlockType *blk_ptr = OrderBlock(block_index);
t = blk_ptr->data[inner_index - 1].val;
}
void Delete(int index) noexcept { free_mem.push(index); }
void Delete(int index) noexcept {
int block_index = (index - 1) / kDataPerBlock + 1;
int inner_index = (index - 1) % kDataPerBlock + 1;
BlockType *blk_ptr = OrderBlock(block_index);
blk_ptr->data[inner_index - 1].next_vacant_data_index =
first_vacant_data_index;
first_vacant_data_index = index;
}
};
#endif // BPT_DriveArray_HPP

View File

@ -21,19 +21,19 @@ class String2Index {
}
};
static const int kNodesPerBlock =
(kPageSize - 2 * sizeof(int)) / sizeof(Node);
(kPageSize - 3 * sizeof(int)) / sizeof(Node);
struct Block {
int tot, nxt_idx;
Node data[kNodesPerBlock];
char padding[kPageSize - 2 * sizeof(int) - sizeof(Node) * (kNodesPerBlock)];
char padding[kPageSize - 3 * sizeof(int) - sizeof(Node) * (kNodesPerBlock)];
Block() : tot(0), nxt_idx(0) {}
Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {}
};
static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error");
static_assert(sizeof(Block) == kPageSize, "Block Size error");
static_assert(sizeof(Block) == kPageSize - 4, "Block Size error");
DriveArray<Block, kBucketSize, 10> mem;
DriveArray<Block, kBucketSize, 100> mem;
int *hash_table = nullptr;
std::string file_name;
@ -59,10 +59,7 @@ class String2Index {
hash_table = new int[kBucketSize];
// std::memmove(hash_table, mem.RawData(), sizeof(int) * kBucketSize);
// mem.ForceRefresh();
for (int i = 0; i < kBucketSize; i++) {
hash_table[i] = *((int *)(mem.RawData()) + i);
if (i % 4096 == 0) mem.ForceRefresh();
}
mem.LoadInfoTo(hash_table);
}
String2Index(const std::string __file_name) : file_name(__file_name) {
OpenFile(file_name);
@ -78,10 +75,7 @@ class String2Index {
}
~String2Index() {
if (hash_table != nullptr) {
for (int i = 0; i < kBucketSize; i++) {
*((int *)(mem.RawData()) + i) = hash_table[i];
if (i % 4096 == 0) mem.ForceRefresh();
}
mem.WriteInfoFrom(hash_table);
delete[] hash_table;
}
}