From 173a2904e52155e041db734fea68b1ba6eb76e99 Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Wed, 31 Jul 2024 08:30:02 +0000 Subject: [PATCH] finish LSB --- include/csu.h | 1 + include/loadstorequeue.h | 196 +++++++++++++++++++++++++++++++++++++-- include/memory.h | 5 +- include/register.h | 4 + src/main.cpp | 2 + 5 files changed, 200 insertions(+), 8 deletions(-) diff --git a/include/csu.h b/include/csu.h index 8251c78..cfe5210 100644 --- a/include/csu.h +++ b/include/csu.h @@ -20,6 +20,7 @@ struct CentralScheduleUnit_Input { dark::Wire<5> completed_memins_ROB_index; dark::Wire<32> completed_memins_read_data; // data from LoadStoreQueue + dark::Wire<7 + 3 + 1> mem_request_full_ins_id; dark::Wire<4> mem_request_type_input; dark::Wire<32> mem_address_input; dark::Wire<32> mem_data_input; diff --git a/include/loadstorequeue.h b/include/loadstorequeue.h index dc439ff..952cf86 100644 --- a/include/loadstorequeue.h +++ b/include/loadstorequeue.h @@ -1,6 +1,9 @@ #pragma once +#include +#include "concept.h" #ifndef LOADSTOREQUEUE_H #include +#include "debug.h" #include "tools.h" namespace ZYM { struct LoadStoreQueue_Input { @@ -46,6 +49,7 @@ struct LoadStoreQueue_Input { }; struct LoadStoreQueue_Output { // request signal, Memory and the L0 cache in ROB will listen to this + dark::Register<7 + 3 + 1> mem_request_full_ins_id; dark::Register<4> request_type_output; dark::Register<5> request_ROB_index; dark::Register<32> request_address_output; @@ -55,12 +59,13 @@ struct LoadStoreQueue_Output { struct LSQ_Record { dark::Register<2> state; // 0: no, 1: initializing dependency, 2: waiting for data dark::Register<7 + 3 + 1> full_ins_id; - dark::Register<32> Vj, Vk; - dark::Register<5> Qj, Qk; + dark::Register<32> V1, V2; + dark::Register<5> Q1, Q2; + dark::Register<1> E1, E2; + dark::Register<1> D1, D2; // 1: no dependency, 0: dependency dark::Register<5> ins_ROB_index; dark::Register<32> ins_self_PC; dark::Register<32> ins_imm; - dark::Register<32> addr; }; struct LoadStoreQueue_Private { dark::Register<5> LSQ_head; @@ -77,38 +82,217 @@ struct LoadStoreQueue : public dark::Module(LSQ_remain_space); if (bool(is_issuing) && issue_type == 1) { #ifdef _DEBUG if (LSQ_remain_space == 0 || LSQ_remain_space > 32) throw std::runtime_error("LSQ_remain_space is out of range"); #endif has_accepted_ins_last_cycle <= 1; - // TODO: now we can accept the instruction, that is, to store it in the LSQ + // now we can accept the instruction, that is, to store it in the LSQ + uint32_t cur_queue_tail = static_cast(LSQ_tail); + last_cycle_ins_LSQ_index <= cur_queue_tail; + LSQ_tail <= (cur_queue_tail + 1) % 32; + next_remain_space--; + LSQ_queue[cur_queue_tail].state <= 1; + LSQ_queue[cur_queue_tail].full_ins_id <= full_ins_id; + LSQ_queue[cur_queue_tail].ins_ROB_index <= issue_ROB_index; + LSQ_queue[cur_queue_tail].ins_self_PC <= issuing_PC; + LSQ_queue[cur_queue_tail].ins_imm <= decoded_imm; + LSQ_queue[cur_queue_tail].E1 <= has_decoded_rs1; + LSQ_queue[cur_queue_tail].E2 <= has_decoded_rs2; + LSQ_queue[cur_queue_tail].D1 <= 1; // temporarily + LSQ_queue[cur_queue_tail].D2 <= 1; // temporarily + // LSQ_queue[cur_queue_tail].Q1 <= decoded_rs1; // temporarily, no use + // LSQ_queue[cur_queue_tail].Q2 <= decoded_rs2; // temporarily, no use } else has_accepted_ins_last_cycle <= 0; + uint32_t last_idx = static_cast(last_cycle_ins_LSQ_index); + bool last_cycle_V1_proccessed = false; + bool last_cycle_V2_proccessed = false; if (bool(has_accepted_ins_last_cycle)) { - // TODO: now dependency info can be read from the register file, in the mean time, CSU will provide the + // now dependency info can be read from the register file, in the mean time, CSU will provide the // potentially missing data + if (bool(LSQ_queue[last_idx].E1) && bool(rs1_nodep)) { + LSQ_queue[last_idx].V1 <= rs1_value; + LSQ_queue[last_idx].D1 <= 1; + last_cycle_V1_proccessed = true; + } + if (bool(LSQ_queue[last_idx].E2) && bool(rs2_nodep)) { + LSQ_queue[last_idx].V2 <= rs2_value; + LSQ_queue[last_idx].D2 <= 1; + last_cycle_V2_proccessed = true; + } + if (bool(LSQ_queue[last_idx].E1) && (!bool(rs1_nodep)) && bool(rs1_is_in_ROB)) { + LSQ_queue[last_idx].V1 <= rs1_in_ROB_value; + LSQ_queue[last_idx].D1 <= 1; + last_cycle_V1_proccessed = true; + } + if (bool(LSQ_queue[last_idx].E2) && (!bool(rs2_nodep)) && bool(rs2_is_in_ROB)) { + LSQ_queue[last_idx].V2 <= rs2_in_ROB_value; + LSQ_queue[last_idx].D2 <= 1; + last_cycle_V2_proccessed = true; + } + } + bool should_monitor_V1 = + bool(has_accepted_ins_last_cycle) && bool(LSQ_queue[last_idx].E1) && !last_cycle_V1_proccessed; + bool should_monitor_V2 = + bool(has_accepted_ins_last_cycle) && bool(LSQ_queue[last_idx].E2) && !last_cycle_V2_proccessed; + // now alu, memory (and L0 cache of memory) may provide data to satisfy the dependency + auto process_listend_data = [&](uint32_t res_ROB_index, uint32_t res_value) -> void { + uint32_t ptr = static_cast(LSQ_head); + while (ptr != static_cast(LSQ_tail)) { + if ((!bool(has_accepted_ins_last_cycle)) || ptr != last_idx) { + dark::debug::assert(LSQ_queue[ptr].state == 2, "LSQ_queue[ptr].state != 2"); + if (static_cast(LSQ_queue[ptr].Q1) == res_ROB_index) { + LSQ_queue[ptr].V1 <= res_value; + LSQ_queue[ptr].D1 <= 1; + } + if (static_cast(LSQ_queue[ptr].Q2) == res_ROB_index) { + LSQ_queue[ptr].V2 <= res_value; + LSQ_queue[ptr].D2 <= 1; + } + } else { + if (should_monitor_V1 && static_cast(rs1_deps) == res_ROB_index) { + LSQ_queue[last_idx].V1 <= res_value; + LSQ_queue[last_idx].D1 <= 1; + should_monitor_V1 = false; + } + if (should_monitor_V2 && static_cast(rs2_deps) == res_ROB_index) { + LSQ_queue[last_idx].V2 <= res_value; + LSQ_queue[last_idx].D2 <= 1; + should_monitor_V2 = false; + } + } + ptr = (ptr + 1) % 32; + } + }; + if (static_cast(alu_status_receiver) == 0b10) { + process_listend_data(static_cast(completed_aluins_ROB_index), + static_cast(completed_aluins_result)); + } + if (static_cast(mem_data_sign) == 0b10) { + process_listend_data(static_cast(completed_memins_ROB_index), + static_cast(completed_memins_read_data)); + } + if (static_cast(cache_hit) == 1) { + process_listend_data(static_cast(cache_hit_ROB_index), static_cast(cache_hit_data)); + } + if (should_monitor_V1) { + LSQ_queue[last_idx].D1 <= 0; + LSQ_queue[last_idx].Q1 <= rs1_deps; + } + if (should_monitor_V2) { + LSQ_queue[last_idx].D2 <= 0; + LSQ_queue[last_idx].Q2 <= rs2_deps; } - // TODO: now alu, memory (and L0 cache of memory) may provide data to satisfy the dependency // TODO: now, we can check if we can execute the instruction, memory and L0 cache will listen to this + // other data + if (bool(has_accepted_ins_last_cycle)) LSQ_queue[last_idx].state <= 2; + bool can_execute = false; + if (static_cast(mem_data_sign) > 0) { + if (static_cast(LSQ_head) != static_cast(LSQ_tail)) { + uint32_t head = static_cast(LSQ_head); + if (LSQ_queue[head].state.peek() == 2) { + if (((LSQ_queue[head].E1.peek() == 0) || + (LSQ_queue[head].E1.peek() == 1 && LSQ_queue[head].D1.peek() == 1)) && + ((LSQ_queue[head].E2.peek() == 0) || + (LSQ_queue[head].E2.peek() == 1 && LSQ_queue[head].D2.peek() == 1))) { + // now we can execute the instruction + next_remain_space--; + can_execute = true; + LSQ_head <= (head + 1) % 32; + uint32_t ins = static_cast(LSQ_queue[head].full_ins_id); + if (ins == 0b00000000011) { + // lb + mem_request_full_ins_id <= ins; + request_type_output <= 0b0001; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + } else if (ins == 0b00010000011) { + // lh + mem_request_full_ins_id <= ins; + request_type_output <= 0b0101; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + } else if (ins == 0b00100000011) { + // lw + mem_request_full_ins_id <= ins; + request_type_output <= 0b1001; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + } else if (ins == 0b01000000011) { + // lbu + mem_request_full_ins_id <= ins; + request_type_output <= 0b0001; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + } else if (ins == 0b01010000011) { + // lhu + mem_request_full_ins_id <= ins; + request_type_output <= 0b0101; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + } else if (ins == 0b00000100011) { + // sb + mem_request_full_ins_id <= ins; + request_type_output <= 0b0010; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + request_data_output <= (static_cast(LSQ_queue[head].V2) & 0xFF); + } else if (ins == 0b00010100011) { + // sh + mem_request_full_ins_id <= ins; + request_type_output <= 0b0110; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + request_data_output <= (static_cast(LSQ_queue[head].V2) & 0xFFFF); + } else if (ins == 0b00100100011) { + // sw + mem_request_full_ins_id <= ins; + request_type_output <= 0b1010; + request_ROB_index <= static_cast(LSQ_queue[head].ins_ROB_index); + request_address_output <= + (static_cast(LSQ_queue[head].V1) + static_cast(LSQ_queue[head].ins_imm)); + request_data_output <= static_cast(LSQ_queue[head].V2); + } else { + throw std::runtime_error("Invalid instruction"); + } + } + } + } + } + if (!can_execute) request_type_output <= 0; + LSQ_remain_space <= next_remain_space; + LSQ_remain_space_output <= next_remain_space; } }; } // namespace ZYM diff --git a/include/memory.h b/include/memory.h index ed320bf..7eb5665 100644 --- a/include/memory.h +++ b/include/memory.h @@ -9,12 +9,13 @@ using dark::max_size_t; namespace ZYM { struct Memory_Input { + dark::Wire<1> reset; + dark::Wire<1> force_clear_receiver; + dark::Wire<7 + 3 + 1> full_ins_id; dark::Wire<4> request_type_input; dark::Wire<32> address_input; dark::Wire<32> data_input; dark::Wire<5> request_ROB_index; - dark::Wire<1> reset; - dark::Wire<1> force_clear_receiver; }; struct Memory_Output { dark::Register<2> data_sign; diff --git a/include/register.h b/include/register.h index aa27df5..ab07c10 100644 --- a/include/register.h +++ b/include/register.h @@ -39,6 +39,10 @@ public: this->_M_assigned = true; this->_M_new = static_cast(value); } + auto peek() const -> max_size_t { // this function should only be used for convinience within the same module + if(this->_M_assigned) return this->_M_new; + return this->_M_old; + } explicit operator max_size_t() const { return this->_M_old; } explicit operator bool() const { return this->_M_old; } diff --git a/src/main.cpp b/src/main.cpp index f9ee329..fb658de 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -62,6 +62,7 @@ int main(int argc, char **argv) { RWConnect(csu.cache_hit, lsq.cache_hit); RWConnect(csu.cache_hit_ROB_index, lsq.cache_hit_ROB_index); RWConnect(csu.cache_hit_data, lsq.cache_hit_data); + RWConnect(lsq.mem_request_full_ins_id, csu.mem_request_full_ins_id); RWConnect(lsq.request_type_output, csu.mem_request_type_input); RWConnect(lsq.request_ROB_index, csu.mem_request_ROB_index); RWConnect(lsq.request_address_output, csu.mem_address_input); @@ -121,6 +122,7 @@ int main(int argc, char **argv) { RWConnect(memory.data_sign, lsq.mem_data_sign); RWConnect(memory.completed_memins_ROB_index, lsq.completed_memins_ROB_index); RWConnect(memory.completed_memins_read_data, lsq.completed_memins_read_data); + RWConnect(lsq.mem_request_full_ins_id, memory.full_ins_id); RWConnect(lsq.request_type_output, memory.request_type_input); RWConnect(lsq.request_ROB_index, memory.request_ROB_index); RWConnect(lsq.request_address_output, memory.address_input);