docs: optimize drivearray
This commit is contained in:
@ -1,181 +1,203 @@
|
|||||||
#ifndef BPT_DriveArray_HPP
|
#ifndef BPT_DriveArray_HPP
|
||||||
#define BPT_DriveArray_HPP
|
#define BPT_DriveArray_HPP
|
||||||
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <mutex>
|
#include <fstream>
|
||||||
|
#include <queue>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
template <class T, const int info_len = 2, const int kRefreshThreshold = 100>
|
template <class T, const int info_len = 2, const int kBufSize = 100>
|
||||||
class DriveArray {
|
class DriveArray {
|
||||||
private:
|
private:
|
||||||
static const int kPageSize = 4096;
|
static const int kPageSize = 4096;
|
||||||
int file_descriptor = -1;
|
static const int kDataBiginOffset =
|
||||||
size_t file_length = 0;
|
|
||||||
void *virtual_mem;
|
|
||||||
std::string file_name;
|
|
||||||
const int sizeofT = sizeof(T);
|
|
||||||
const int raw_data_begin =
|
|
||||||
((info_len + 2) * sizeof(int) + kPageSize - 1) / kPageSize * kPageSize;
|
((info_len + 2) * sizeof(int) + kPageSize - 1) / kPageSize * kPageSize;
|
||||||
std::stack<int> free_mem;
|
static const int sizeofT = sizeof(T);
|
||||||
int total_mem = 0;
|
struct DataType {
|
||||||
unsigned int forced_refresh = 0;
|
int next_vacant_data_index;
|
||||||
void reallocate(bool include_resync = false) noexcept {
|
T val;
|
||||||
size_t length_needed =
|
};
|
||||||
raw_data_begin +
|
static const int kBlockSize =
|
||||||
(sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize;
|
(sizeof(DataType) + kPageSize - 1) / kPageSize * kPageSize;
|
||||||
if (include_resync)
|
static const int kDataPerBlock = kBlockSize / sizeof(DataType);
|
||||||
length_needed += (free_mem.size() * sizeof(int) + kPageSize - 1) /
|
struct BlockType {
|
||||||
kPageSize * kPageSize;
|
DataType data[kDataPerBlock];
|
||||||
if (file_length >= length_needed) return;
|
};
|
||||||
munmap(virtual_mem, file_length);
|
char rest[kBlockSize - sizeof(BlockType)];
|
||||||
file_length = std::max(file_length * 2, length_needed);
|
static_assert(kBlockSize % kPageSize == 0, "kBlockSize % kPageSize != 0");
|
||||||
ftruncate(file_descriptor, file_length);
|
std::string file_name;
|
||||||
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
|
int total_block_number = 0, first_vacant_data_index = 0;
|
||||||
file_descriptor, 0);
|
/**
|
||||||
|
* DataIndex=(BlockIndex-1)*kDataPerBlock+InnnerIndex
|
||||||
|
*/
|
||||||
|
std::unordered_map<int, BlockType *> cache;
|
||||||
|
std::queue<int> vis_que;
|
||||||
|
void LoadCache(int block_index) {
|
||||||
|
BlockType *tmp = new BlockType;
|
||||||
|
fs.seekg(kDataBiginOffset + (block_index - 1) * kBlockSize, std::ios::beg);
|
||||||
|
fs.read(reinterpret_cast<char *>(tmp), sizeof(BlockType));
|
||||||
|
cache[block_index] = tmp;
|
||||||
|
vis_que.push(block_index);
|
||||||
|
}
|
||||||
|
void ReleaseOldestCache() {
|
||||||
|
int block_index = vis_que.front();
|
||||||
|
vis_que.pop();
|
||||||
|
fs.seekp(kDataBiginOffset + (block_index - 1) * kBlockSize, std::ios::beg);
|
||||||
|
fs.write(reinterpret_cast<char *>(cache[block_index]), sizeof(BlockType));
|
||||||
|
delete cache[block_index];
|
||||||
|
cache.erase(block_index);
|
||||||
|
}
|
||||||
|
BlockType *OrderBlock(int block_index) {
|
||||||
|
if (cache.find(block_index) != cache.end()) return cache[block_index];
|
||||||
|
if (cache.size() == kBufSize) ReleaseOldestCache();
|
||||||
|
LoadCache(block_index);
|
||||||
|
return cache[block_index];
|
||||||
|
}
|
||||||
|
int AppEndBlock() {
|
||||||
|
fs.seekp(0, std::ios::end);
|
||||||
|
BlockType tmp;
|
||||||
|
fs.write(reinterpret_cast<char *>(&tmp), sizeof(BlockType));
|
||||||
|
fs.write(rest, kBlockSize - sizeof(BlockType));
|
||||||
|
++total_block_number;
|
||||||
|
return total_block_number;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
std::fstream fs;
|
||||||
DriveArray() = default;
|
DriveArray() = default;
|
||||||
inline bool IsOpen() const noexcept { return file_descriptor >= 0; }
|
DriveArray(const DriveArray &) = delete;
|
||||||
~DriveArray() {
|
DriveArray &operator=(const DriveArray &) = delete;
|
||||||
if (file_descriptor >= 0) {
|
|
||||||
reallocate(true);
|
|
||||||
int stk_data_begin =
|
|
||||||
raw_data_begin +
|
|
||||||
(sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize;
|
|
||||||
*((int *)(virtual_mem) + info_len) = total_mem;
|
|
||||||
*((int *)(virtual_mem) + info_len + 1) = (int)free_mem.size();
|
|
||||||
int *p = (int *)(virtual_mem + stk_data_begin);
|
|
||||||
while (!free_mem.empty()) {
|
|
||||||
*(p++) = free_mem.top();
|
|
||||||
free_mem.pop();
|
|
||||||
}
|
|
||||||
munmap(virtual_mem, file_length);
|
|
||||||
close(file_descriptor);
|
|
||||||
file_descriptor = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool operator=(const DriveArray &) = delete;
|
|
||||||
void ForceRefresh() noexcept {
|
|
||||||
munmap(virtual_mem, file_length);
|
|
||||||
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
||||||
file_descriptor, 0);
|
|
||||||
}
|
|
||||||
void *RawData() noexcept { return virtual_mem; }
|
|
||||||
void OpenFile(const std::string &file_name) {
|
|
||||||
if (file_name == "") return;
|
|
||||||
if (file_descriptor >= 0) {
|
|
||||||
munmap(virtual_mem, file_length);
|
|
||||||
close(file_descriptor);
|
|
||||||
file_descriptor = -1;
|
|
||||||
}
|
|
||||||
file_descriptor =
|
|
||||||
open(file_name.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
|
||||||
struct stat file_state;
|
|
||||||
fstat(file_descriptor, &file_state);
|
|
||||||
file_length = file_state.st_size;
|
|
||||||
if (file_length == 0) {
|
|
||||||
file_length = 1024 * 1024;
|
|
||||||
file_length =
|
|
||||||
std::max(file_length, ((info_len + 2) * sizeof(int) + kPageSize - 1) /
|
|
||||||
kPageSize * kPageSize);
|
|
||||||
ftruncate(file_descriptor, file_length);
|
|
||||||
}
|
|
||||||
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
||||||
file_descriptor, 0);
|
|
||||||
total_mem = *((int *)(virtual_mem) + info_len);
|
|
||||||
int free_mem_cnt = *((int *)(virtual_mem) + info_len + 1);
|
|
||||||
int stk_data_begin =
|
|
||||||
raw_data_begin +
|
|
||||||
(sizeofT * total_mem + kPageSize - 1) / kPageSize * kPageSize;
|
|
||||||
int *p = (int *)(virtual_mem + stk_data_begin);
|
|
||||||
for (int i = 0; i < free_mem_cnt; i++) {
|
|
||||||
free_mem.push(*(p++));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DriveArray(const std::string &file_name) : file_name(file_name) {
|
DriveArray(const std::string &file_name) : file_name(file_name) {
|
||||||
OpenFile(file_name);
|
OpenFile(file_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initialise(std::string FN = "") {
|
inline bool IsOpen() const noexcept { return fs.is_open(); }
|
||||||
if (FN != "") file_name = FN;
|
~DriveArray() { CloseFile(); }
|
||||||
if (file_descriptor >= 0) {
|
void CloseFile() {
|
||||||
munmap(virtual_mem, file_length);
|
if (!fs.is_open()) return;
|
||||||
close(file_descriptor);
|
while (cache.size() > 0) ReleaseOldestCache();
|
||||||
file_descriptor = -1;
|
fs.seekp(sizeof(int) * info_len, std::ios::beg);
|
||||||
|
fs.write(reinterpret_cast<char *>(&first_vacant_data_index), sizeof(int));
|
||||||
|
fs.write(reinterpret_cast<char *>(&total_block_number), sizeof(int));
|
||||||
|
fs.close();
|
||||||
|
file_name = "";
|
||||||
|
first_vacant_data_index = 0;
|
||||||
|
total_block_number = 0;
|
||||||
}
|
}
|
||||||
file_descriptor =
|
void OpenFile(const std::string &__file_name) {
|
||||||
open(file_name.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
if (fs.is_open()) CloseFile();
|
||||||
file_length = 1024 * 1024;
|
file_name = __file_name;
|
||||||
file_length =
|
fs.open(file_name, std::ios::in | std::ios::out | std::ios::binary);
|
||||||
std::max(file_length, ((info_len + 2) * sizeof(int) + kPageSize - 1) /
|
if (!fs.is_open()) {
|
||||||
kPageSize * kPageSize);
|
fs.open(file_name, std::ios::out | std::ios::binary);
|
||||||
ftruncate(file_descriptor, file_length);
|
fs.seekp(0, std::ios::beg);
|
||||||
virtual_mem = mmap(nullptr, file_length, PROT_READ | PROT_WRITE, MAP_SHARED,
|
int tmp = 0;
|
||||||
file_descriptor, 0);
|
total_block_number = 0;
|
||||||
for (int i = 0; i < info_len; i++) *((int *)(virtual_mem) + i) = 0;
|
first_vacant_data_index = 0;
|
||||||
|
for (int i = 0; i < kDataBiginOffset / sizeof(int); ++i) {
|
||||||
|
fs.write(reinterpret_cast<char *>(&tmp), sizeof(int));
|
||||||
|
}
|
||||||
|
fs.close();
|
||||||
|
fs.open(file_name, std::ios::in | std::ios::out | std::ios::binary);
|
||||||
|
}
|
||||||
|
fs.seekg(sizeof(int) * info_len, std::ios::beg);
|
||||||
|
fs.read(reinterpret_cast<char *>(&first_vacant_data_index), sizeof(int));
|
||||||
|
fs.read(reinterpret_cast<char *>(&total_block_number), sizeof(int));
|
||||||
|
}
|
||||||
|
void initialise(std::string FN = "") {
|
||||||
|
if (fs.is_open()) {
|
||||||
|
std::string name_bak = file_name;
|
||||||
|
CloseFile();
|
||||||
|
file_name = name_bak;
|
||||||
|
}
|
||||||
|
if (FN != "") file_name = FN;
|
||||||
|
if (file_name == "") return;
|
||||||
|
fs.open(file_name, std::ios::out | std::ios::binary);
|
||||||
|
fs.seekp(0, std::ios::beg);
|
||||||
|
int tmp = 0;
|
||||||
|
total_block_number = 0;
|
||||||
|
first_vacant_data_index = 0;
|
||||||
|
for (int i = 0; i < kDataBiginOffset / sizeof(int); ++i) {
|
||||||
|
fs.write(reinterpret_cast<char *>(&tmp), sizeof(int));
|
||||||
|
}
|
||||||
|
fs.close();
|
||||||
|
fs.open(file_name, std::ios::in | std::ios::out | std::ios::binary);
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_info(int &tmp, int n) noexcept {
|
void get_info(int &tmp, int n) noexcept {
|
||||||
if (n > info_len) return;
|
if (n > info_len) return;
|
||||||
tmp = *((int *)(virtual_mem) + n - 1);
|
fs.seekg((n - 1) * sizeof(int), std::ios::beg);
|
||||||
if (++forced_refresh >= kRefreshThreshold) {
|
fs.read(reinterpret_cast<char *>(&tmp), sizeof(int));
|
||||||
forced_refresh = 0;
|
|
||||||
ForceRefresh();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_info(int tmp, int n) noexcept {
|
void write_info(int tmp, int n) noexcept {
|
||||||
if (n > info_len) return;
|
if (n > info_len) return;
|
||||||
*((int *)(virtual_mem) + n - 1) = tmp;
|
fs.seekp((n - 1) * sizeof(int), std::ios::beg);
|
||||||
if (++forced_refresh >= kRefreshThreshold) {
|
fs.write(reinterpret_cast<char *>(&tmp), sizeof(int));
|
||||||
forced_refresh = 0;
|
|
||||||
ForceRefresh();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LoadInfoTo(int *dest) {
|
||||||
|
fs.seekg(0, std::ios::beg);
|
||||||
|
fs.read(reinterpret_cast<char *>(dest), sizeof(int) * info_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteInfoFrom(int *src) {
|
||||||
|
fs.seekp(0, std::ios::beg);
|
||||||
|
fs.write(reinterpret_cast<char *>(src), sizeof(int) * info_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
int write(T &t) noexcept {
|
int write(T &t) noexcept {
|
||||||
int index = -1;
|
if (first_vacant_data_index == 0) {
|
||||||
if (!free_mem.empty()) {
|
int new_block_index = AppEndBlock();
|
||||||
index = free_mem.top();
|
int index = (new_block_index - 1) * kDataPerBlock + 1;
|
||||||
free_mem.pop();
|
BlockType *blk_ptr = OrderBlock(new_block_index);
|
||||||
} else
|
if (kDataPerBlock > 1)
|
||||||
index = ++total_mem;
|
first_vacant_data_index = index + 1;
|
||||||
update(t, index);
|
else
|
||||||
|
first_vacant_data_index = 0;
|
||||||
|
for (int i = 1; i < kDataPerBlock - 1; i++)
|
||||||
|
blk_ptr->data[i].next_vacant_data_index = index + i + 1;
|
||||||
|
blk_ptr->data[kDataPerBlock - 1].next_vacant_data_index = 0;
|
||||||
|
blk_ptr->data[0].next_vacant_data_index = 0;
|
||||||
|
blk_ptr->data[0].val = t;
|
||||||
return index;
|
return index;
|
||||||
|
} else {
|
||||||
|
int block_index = (first_vacant_data_index - 1) / kDataPerBlock + 1;
|
||||||
|
int inner_index = (first_vacant_data_index - 1) % kDataPerBlock + 1;
|
||||||
|
BlockType *blk_ptr = OrderBlock(block_index);
|
||||||
|
int index = first_vacant_data_index;
|
||||||
|
first_vacant_data_index =
|
||||||
|
blk_ptr->data[inner_index - 1].next_vacant_data_index;
|
||||||
|
blk_ptr->data[inner_index - 1].next_vacant_data_index = 0;
|
||||||
|
blk_ptr->data[inner_index - 1].val = t;
|
||||||
|
return index;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void update(T &t, const int index) noexcept {
|
void update(T &t, const int index) noexcept {
|
||||||
reallocate();
|
int block_index = (index - 1) / kDataPerBlock + 1;
|
||||||
void *data_begin = virtual_mem + raw_data_begin + sizeofT * (index - 1);
|
int inner_index = (index - 1) % kDataPerBlock + 1;
|
||||||
std::memmove(data_begin, &t, sizeofT);
|
BlockType *blk_ptr = OrderBlock(block_index);
|
||||||
// madvise(data_begin, sizeofT, MADV_FREE);
|
blk_ptr->data[inner_index - 1].val = t;
|
||||||
if (++forced_refresh >= kRefreshThreshold) {
|
|
||||||
forced_refresh = 0;
|
|
||||||
ForceRefresh();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void read(T &t, const int index) noexcept {
|
void read(T &t, const int index) noexcept {
|
||||||
reallocate();
|
int block_index = (index - 1) / kDataPerBlock + 1;
|
||||||
void *data_begin = virtual_mem + raw_data_begin + sizeofT * (index - 1);
|
int inner_index = (index - 1) % kDataPerBlock + 1;
|
||||||
std::memmove(&t, data_begin, sizeofT);
|
BlockType *blk_ptr = OrderBlock(block_index);
|
||||||
// madvise(data_begin, sizeofT, MADV_FREE);
|
t = blk_ptr->data[inner_index - 1].val;
|
||||||
if (++forced_refresh >= kRefreshThreshold) {
|
|
||||||
forced_refresh = 0;
|
|
||||||
ForceRefresh();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Delete(int index) noexcept { free_mem.push(index); }
|
void Delete(int index) noexcept {
|
||||||
|
int block_index = (index - 1) / kDataPerBlock + 1;
|
||||||
|
int inner_index = (index - 1) % kDataPerBlock + 1;
|
||||||
|
BlockType *blk_ptr = OrderBlock(block_index);
|
||||||
|
blk_ptr->data[inner_index - 1].next_vacant_data_index =
|
||||||
|
first_vacant_data_index;
|
||||||
|
first_vacant_data_index = index;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // BPT_DriveArray_HPP
|
#endif // BPT_DriveArray_HPP
|
@ -21,19 +21,19 @@ class String2Index {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
static const int kNodesPerBlock =
|
static const int kNodesPerBlock =
|
||||||
(kPageSize - 2 * sizeof(int)) / sizeof(Node);
|
(kPageSize - 3 * sizeof(int)) / sizeof(Node);
|
||||||
|
|
||||||
struct Block {
|
struct Block {
|
||||||
int tot, nxt_idx;
|
int tot, nxt_idx;
|
||||||
Node data[kNodesPerBlock];
|
Node data[kNodesPerBlock];
|
||||||
char padding[kPageSize - 2 * sizeof(int) - sizeof(Node) * (kNodesPerBlock)];
|
char padding[kPageSize - 3 * sizeof(int) - sizeof(Node) * (kNodesPerBlock)];
|
||||||
Block() : tot(0), nxt_idx(0) {}
|
Block() : tot(0), nxt_idx(0) {}
|
||||||
Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {}
|
Block(int _tot, int _nxt_idx) : tot(_tot), nxt_idx(_nxt_idx) {}
|
||||||
};
|
};
|
||||||
static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error");
|
static_assert(kNodesPerBlock >= 1, "kNodesPerBlock error");
|
||||||
static_assert(sizeof(Block) == kPageSize, "Block Size error");
|
static_assert(sizeof(Block) == kPageSize - 4, "Block Size error");
|
||||||
|
|
||||||
DriveArray<Block, kBucketSize, 10> mem;
|
DriveArray<Block, kBucketSize, 100> mem;
|
||||||
int *hash_table = nullptr;
|
int *hash_table = nullptr;
|
||||||
std::string file_name;
|
std::string file_name;
|
||||||
|
|
||||||
@ -59,10 +59,7 @@ class String2Index {
|
|||||||
hash_table = new int[kBucketSize];
|
hash_table = new int[kBucketSize];
|
||||||
// std::memmove(hash_table, mem.RawData(), sizeof(int) * kBucketSize);
|
// std::memmove(hash_table, mem.RawData(), sizeof(int) * kBucketSize);
|
||||||
// mem.ForceRefresh();
|
// mem.ForceRefresh();
|
||||||
for (int i = 0; i < kBucketSize; i++) {
|
mem.LoadInfoTo(hash_table);
|
||||||
hash_table[i] = *((int *)(mem.RawData()) + i);
|
|
||||||
if (i % 4096 == 0) mem.ForceRefresh();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
String2Index(const std::string __file_name) : file_name(__file_name) {
|
String2Index(const std::string __file_name) : file_name(__file_name) {
|
||||||
OpenFile(file_name);
|
OpenFile(file_name);
|
||||||
@ -78,10 +75,7 @@ class String2Index {
|
|||||||
}
|
}
|
||||||
~String2Index() {
|
~String2Index() {
|
||||||
if (hash_table != nullptr) {
|
if (hash_table != nullptr) {
|
||||||
for (int i = 0; i < kBucketSize; i++) {
|
mem.WriteInfoFrom(hash_table);
|
||||||
*((int *)(mem.RawData()) + i) = hash_table[i];
|
|
||||||
if (i % 4096 == 0) mem.ForceRefresh();
|
|
||||||
}
|
|
||||||
delete[] hash_table;
|
delete[] hash_table;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user