Files
RISC-V-Simulator/include/loadstorequeue.h

347 lines
17 KiB
C++

#pragma once
#include <cstdint>
#include <iomanip>
#include "concept.h"
#ifndef LOADSTOREQUEUE_H
#include <array>
#include "debug.h"
#include "tools.h"
namespace ZYM {
struct LoadStoreQueue_Input {
// receive control signal from CSU
dark::Wire<1> reset;
dark::Wire<1> force_clear_receiver;
dark::Wire<1> is_issuing;
dark::Wire<1> issue_type;
dark::Wire<5> issue_ROB_index;
dark::Wire<7 + 3 + 1> full_ins_id;
dark::Wire<32> full_ins;
dark::Wire<32> issuing_PC;
dark::Wire<5> decoded_rd;
dark::Wire<1> has_decoded_rd;
dark::Wire<5> decoded_rs1;
dark::Wire<1> has_decoded_rs1;
dark::Wire<1> rs1_is_in_ROB;
dark::Wire<32> rs1_in_ROB_value;
dark::Wire<5> decoded_rs2;
dark::Wire<1> has_decoded_rs2;
dark::Wire<1> rs2_is_in_ROB;
dark::Wire<32> rs2_in_ROB_value;
dark::Wire<32> decoded_imm;
// receive data from register file
dark::Wire<1> rs1_nodep;
dark::Wire<5> rs1_deps;
dark::Wire<32> rs1_value;
dark::Wire<1> rs2_nodep;
dark::Wire<5> rs2_deps;
dark::Wire<32> rs2_value;
// data from alu
dark::Wire<2> alu_status_receiver;
dark::Wire<5> completed_aluins_ROB_index;
dark::Wire<32> completed_aluins_result;
// receive status signal from Memory
dark::Wire<2> mem_data_sign;
dark::Wire<5> completed_memins_ROB_index;
dark::Wire<32> completed_memins_read_data;
// receive status signal from L0 cache
// dark::Wire<1> cache_hit;
// dark::Wire<5> cache_hit_ROB_index;
// dark::Wire<32> cache_hit_data;
};
struct LoadStoreQueue_Output {
// request signal, Memory and the L0 cache in ROB will listen to this
dark::Register<7 + 3 + 1> mem_request_full_ins_id;
dark::Register<4> request_type_output;
dark::Register<5> request_ROB_index;
dark::Register<32> request_address_output;
dark::Register<32> request_data_output;
dark::Register<6> LSQ_remain_space_output;
};
struct LSQ_Record {
dark::Register<2> state; // 0: no, 1: initializing dependency, 2: waiting for data
dark::Register<7 + 3 + 1> full_ins_id;
dark::Register<32> V1, V2;
dark::Register<5> Q1, Q2;
dark::Register<1> E1, E2;
dark::Register<1> D1, D2; // 1: no dependency, 0: dependency
dark::Register<5> ins_ROB_index;
dark::Register<32> ins_self_PC;
dark::Register<32> ins_imm;
};
struct LoadStoreQueue_Private {
dark::Register<5> LSQ_head;
dark::Register<5> LSQ_tail;
dark::Register<6> LSQ_remain_space;
std::array<LSQ_Record, 32> LSQ_queue;
dark::Register<1> has_accepted_ins_last_cycle;
dark::Register<5> last_cycle_ins_LSQ_index;
};
struct LoadStoreQueue : public dark::Module<LoadStoreQueue_Input, LoadStoreQueue_Output, LoadStoreQueue_Private> {
LoadStoreQueue() {
// Constructor
}
void work() {
if (bool(reset)) {
LSQ_remain_space <= 32;
LSQ_remain_space_output <= 32;
LSQ_head <= 0;
LSQ_tail <= 0;
for (auto &record : LSQ_queue) {
record.state <= 0;
}
has_accepted_ins_last_cycle <= 0;
request_type_output <= 0;
return;
}
if (bool(force_clear_receiver)) {
LSQ_remain_space <= 32;
LSQ_remain_space_output <= 32;
LSQ_head <= 0;
LSQ_tail <= 0;
for (auto &record : LSQ_queue) {
record.state <= 0;
}
has_accepted_ins_last_cycle <= 0;
request_type_output <= 0;
return;
}
uint32_t next_remain_space = static_cast<max_size_t>(LSQ_remain_space);
if (bool(is_issuing) && issue_type == 1) {
#ifdef _DEBUG
if (LSQ_remain_space == 0 || LSQ_remain_space > 32) throw std::runtime_error("LSQ_remain_space is out of range");
#endif
has_accepted_ins_last_cycle <= 1;
// now we can accept the instruction, that is, to store it in the LSQ
uint32_t cur_queue_tail = static_cast<max_size_t>(LSQ_tail);
last_cycle_ins_LSQ_index <= cur_queue_tail;
LSQ_tail <= (cur_queue_tail + 1) % 32;
next_remain_space--;
LSQ_queue[cur_queue_tail].state <= 1;
LSQ_queue[cur_queue_tail].full_ins_id <= full_ins_id;
LSQ_queue[cur_queue_tail].ins_ROB_index <= issue_ROB_index;
LSQ_queue[cur_queue_tail].ins_self_PC <= issuing_PC;
LSQ_queue[cur_queue_tail].ins_imm <= decoded_imm;
LSQ_queue[cur_queue_tail].E1 <= has_decoded_rs1;
LSQ_queue[cur_queue_tail].E2 <= has_decoded_rs2;
LSQ_queue[cur_queue_tail].D1 <= 1; // temporarily
LSQ_queue[cur_queue_tail].D2 <= 1; // temporarily
DEBUG_CERR << "LoadStoreQueue is accepting instruction" << std::endl;
DEBUG_CERR << "\tfull_ins_id: " << std::hex << static_cast<max_size_t>(full_ins_id) << std::endl;
DEBUG_CERR << "\tins_ROB_index: " << std::dec << static_cast<max_size_t>(issue_ROB_index) << std::endl;
DEBUG_CERR << "\tins_self_PC: " << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(issuing_PC) << std::endl;
DEBUG_CERR << "\tins_imm: " << std::hex << static_cast<max_size_t>(decoded_imm) << std::endl;
DEBUG_CERR << "\thas_decoded_rs1: " << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(has_decoded_rs1) << std::endl;
DEBUG_CERR << "\thas_decoded_rs2: " << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(has_decoded_rs2) << std::endl;
DEBUG_CERR << "\tstored in positon " << std::dec << static_cast<max_size_t>(cur_queue_tail) << " of LSQ"
<< std::endl;
// LSQ_queue[cur_queue_tail].Q1 <= decoded_rs1; // temporarily, no use
// LSQ_queue[cur_queue_tail].Q2 <= decoded_rs2; // temporarily, no use
} else
has_accepted_ins_last_cycle <= 0;
uint32_t last_idx = static_cast<max_size_t>(last_cycle_ins_LSQ_index);
bool last_cycle_V1_proccessed = false;
bool last_cycle_V2_proccessed = false;
if (bool(has_accepted_ins_last_cycle)) {
// now dependency info can be read from the register file, in the mean time, CSU will provide the
// potentially missing data
DEBUG_CERR << "LoadStoreQueue is process dependency information from register file and ROB" << std::endl;
if (bool(LSQ_queue[last_idx].E1) && bool(rs1_nodep)) {
LSQ_queue[last_idx].V1 <= rs1_value;
LSQ_queue[last_idx].D1 <= 1;
last_cycle_V1_proccessed = true;
DEBUG_CERR << "\t from register file: LSQ_queue[last_idx].V1=" << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(LSQ_queue[last_idx].V1) << std::endl;
}
if (bool(LSQ_queue[last_idx].E2) && bool(rs2_nodep)) {
LSQ_queue[last_idx].V2 <= rs2_value;
LSQ_queue[last_idx].D2 <= 1;
last_cycle_V2_proccessed = true;
DEBUG_CERR << "from register file: LSQ_queue[last_idx].V2=" << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(LSQ_queue[last_idx].V2) << std::endl;
}
if (bool(LSQ_queue[last_idx].E1) && (!bool(rs1_nodep)) && bool(rs1_is_in_ROB)) {
LSQ_queue[last_idx].V1 <= rs1_in_ROB_value;
LSQ_queue[last_idx].D1 <= 1;
last_cycle_V1_proccessed = true;
DEBUG_CERR << "\t from ROB: LSQ_queue[last_idx].V1=" << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(LSQ_queue[last_idx].V1) << std::endl;
}
if (bool(LSQ_queue[last_idx].E2) && (!bool(rs2_nodep)) && bool(rs2_is_in_ROB)) {
LSQ_queue[last_idx].V2 <= rs2_in_ROB_value;
LSQ_queue[last_idx].D2 <= 1;
last_cycle_V2_proccessed = true;
DEBUG_CERR << "from ROB: LSQ_queue[last_idx].V2=" << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<max_size_t>(LSQ_queue[last_idx].V2) << std::endl;
}
DEBUG_CERR << "End of processing dependency information from register file and ROB" << std::endl;
}
bool should_monitor_V1 =
bool(has_accepted_ins_last_cycle) && bool(LSQ_queue[last_idx].E1) && !last_cycle_V1_proccessed;
bool should_monitor_V2 =
bool(has_accepted_ins_last_cycle) && bool(LSQ_queue[last_idx].E2) && !last_cycle_V2_proccessed;
// now alu, memory may provide data to satisfy the dependency
auto process_listend_data = [&](uint32_t res_ROB_index, uint32_t res_value) -> void {
DEBUG_CERR << "res_ROB_index=" << std::dec << res_ROB_index << std::endl;
DEBUG_CERR << "res_value=" << std::hex << std::setw(8) << std::setfill('0') << res_value << std::endl;
DEBUG_CERR << "rs1_deps=" << std::dec << static_cast<max_size_t>(rs1_deps) << std::endl;
DEBUG_CERR << "rs2_deps=" << std::dec << static_cast<max_size_t>(rs2_deps) << std::endl;
uint32_t ptr = static_cast<max_size_t>(LSQ_head);
while (ptr != static_cast<max_size_t>(LSQ_tail) ||
(static_cast<max_size_t>(LSQ_remain_space) == 0 && ptr == static_cast<max_size_t>(LSQ_head))) {
DEBUG_CERR << "\tptr=" << std::dec << ptr << std::endl;
if ((!bool(has_accepted_ins_last_cycle)) || ptr != last_idx) {
DEBUG_CERR << "\tnormal" << std::endl;
dark::debug::assert(LSQ_queue[ptr].state == 2, "LSQ_queue[ptr].state != 2");
if ((!bool(LSQ_queue[ptr].D1)) && static_cast<max_size_t>(LSQ_queue[ptr].Q1) == res_ROB_index) {
LSQ_queue[ptr].V1 <= res_value;
LSQ_queue[ptr].D1 <= 1;
}
if ((!bool(LSQ_queue[ptr].D2)) && static_cast<max_size_t>(LSQ_queue[ptr].Q2) == res_ROB_index) {
LSQ_queue[ptr].V2 <= res_value;
LSQ_queue[ptr].D2 <= 1;
}
} else {
DEBUG_CERR << "\timmediately listend data" << std::endl;
DEBUG_CERR << "should_monitor_V1=" << should_monitor_V1 << std::endl;
DEBUG_CERR << "should_monitor_V2=" << should_monitor_V2 << std::endl;
if (should_monitor_V1 && static_cast<max_size_t>(rs1_deps) == res_ROB_index) {
DEBUG_CERR << "load rs1" << std::endl;
LSQ_queue[last_idx].V1 <= res_value;
LSQ_queue[last_idx].D1 <= 1;
should_monitor_V1 = false;
}
if (should_monitor_V2 && static_cast<max_size_t>(rs2_deps) == res_ROB_index) {
DEBUG_CERR << "load rs2" << std::endl;
LSQ_queue[last_idx].V2 <= res_value;
LSQ_queue[last_idx].D2 <= 1;
should_monitor_V2 = false;
}
}
ptr = (ptr + 1) % 32;
}
};
DEBUG_CERR << "Load Store Queue is listening data from alu" << std::endl;
if (static_cast<max_size_t>(alu_status_receiver) == 0b10) {
DEBUG_CERR << "potentially have sth from alu" << std::endl;
process_listend_data(static_cast<max_size_t>(completed_aluins_ROB_index),
static_cast<max_size_t>(completed_aluins_result));
}
DEBUG_CERR << "Load Store Queue is listening data from memory" << std::endl;
if (static_cast<max_size_t>(mem_data_sign) == 0b10) {
DEBUG_CERR << "potentially have sth from memory" << std::endl;
process_listend_data(static_cast<max_size_t>(completed_memins_ROB_index),
static_cast<max_size_t>(completed_memins_read_data));
}
// if (static_cast<max_size_t>(cache_hit) == 1) {
// process_listend_data(static_cast<max_size_t>(cache_hit_ROB_index), static_cast<max_size_t>(cache_hit_data));
// }
if (should_monitor_V1) {
LSQ_queue[last_idx].D1 <= 0;
LSQ_queue[last_idx].Q1 <= rs1_deps;
}
if (should_monitor_V2) {
LSQ_queue[last_idx].D2 <= 0;
LSQ_queue[last_idx].Q2 <= rs2_deps;
}
// TODO: now, we can check if we can execute the instruction, memory and L0 cache will listen to this
// other data
if (bool(has_accepted_ins_last_cycle)) LSQ_queue[last_idx].state <= 2;
bool can_execute = false;
if (static_cast<uint32_t>(mem_data_sign) > 0 && static_cast<max_size_t>(request_type_output) == 0) {
if (static_cast<uint32_t>(LSQ_head) != static_cast<uint32_t>(LSQ_tail)) {
uint32_t head = static_cast<uint32_t>(LSQ_head);
if (LSQ_queue[head].state == 2) {
if (((LSQ_queue[head].E1 == 0) || (LSQ_queue[head].E1 == 1 && LSQ_queue[head].D1 == 1)) &&
((LSQ_queue[head].E2 == 0) || (LSQ_queue[head].E2 == 1 && LSQ_queue[head].D2 == 1))) {
// now we can execute the instruction
DEBUG_CERR << "Load Store queue is executing instruction" << std::endl;
next_remain_space++;
can_execute = true;
LSQ_head <= (head + 1) % 32;
uint32_t ins = static_cast<uint32_t>(LSQ_queue[head].full_ins_id);
if (ins == 0b00000000011) {
// lb
mem_request_full_ins_id <= ins;
request_type_output <= 0b0001;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
} else if (ins == 0b00010000011) {
// lh
mem_request_full_ins_id <= ins;
request_type_output <= 0b0101;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
} else if (ins == 0b00100000011) {
// lw
mem_request_full_ins_id <= ins;
request_type_output <= 0b1001;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
} else if (ins == 0b01000000011) {
// lbu
mem_request_full_ins_id <= ins;
request_type_output <= 0b0001;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
} else if (ins == 0b01010000011) {
// lhu
mem_request_full_ins_id <= ins;
request_type_output <= 0b0101;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
} else if (ins == 0b00000100011) {
// sb
mem_request_full_ins_id <= ins;
request_type_output <= 0b0010;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
request_data_output <= (static_cast<uint32_t>(LSQ_queue[head].V2) & 0xFF);
} else if (ins == 0b00010100011) {
// sh
mem_request_full_ins_id <= ins;
request_type_output <= 0b0110;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
request_data_output <= (static_cast<uint32_t>(LSQ_queue[head].V2) & 0xFFFF);
} else if (ins == 0b00100100011) {
// sw
mem_request_full_ins_id <= ins;
request_type_output <= 0b1010;
request_ROB_index <= static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index);
request_address_output <=
(static_cast<uint32_t>(LSQ_queue[head].V1) + static_cast<uint32_t>(LSQ_queue[head].ins_imm));
DEBUG_CERR << "\trequest_address_output=" << std::hex << std::setfill('0') << std::setw(8)
<< request_address_output.peek() << std::endl;
DEBUG_CERR << "\toperand1=" << std::hex << std::setfill('0') << std::setw(8)
<< static_cast<uint32_t>(LSQ_queue[head].V1) << std::endl;
DEBUG_CERR << "\timm=" << std::hex << std::setfill('0') << std::setw(8)
<< static_cast<uint32_t>(LSQ_queue[head].ins_imm) << std::endl;
DEBUG_CERR << "\tROB_index=" << std::dec << static_cast<uint32_t>(LSQ_queue[head].ins_ROB_index)
<< std::endl;
request_data_output <= static_cast<uint32_t>(LSQ_queue[head].V2);
} else {
throw std::runtime_error("Invalid instruction");
}
}
}
}
}
if (!can_execute) request_type_output <= 0;
LSQ_remain_space <= next_remain_space;
LSQ_remain_space_output <= next_remain_space;
DEBUG_CERR << "LSQ_queue[16]'s V1: " << std::hex << std::setfill('0') << std::setw(8)
<< static_cast<max_size_t>(LSQ_queue[16].V1) << std::endl;
}
};
} // namespace ZYM
#endif