From 24b18756e825295b65200adaeb29e93818e5ac60 Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Sat, 19 Oct 2024 07:01:21 +0000 Subject: [PATCH] finish live analysis --- include/IR/IR_basic.h | 5 +- include/opt/cfg.h | 104 +++++++++++++++++- include/opt/liveanalysis.h | 4 + include/opt/phieliminate.h | 3 +- include/opt/regalloc.h | 5 +- src/main.cpp | 5 +- src/opt/cfg.cpp | 87 +-------------- src/opt/liveanalysis.cpp | 217 +++++++++++++++++++++++++++++++++++++ src/opt/mem2reg.cpp | 12 +- src/opt/phieliminate.cpp | 17 ++- src/opt/regalloc.cpp | 16 ++- 11 files changed, 367 insertions(+), 108 deletions(-) create mode 100644 include/opt/liveanalysis.h create mode 100644 src/opt/liveanalysis.cpp diff --git a/include/IR/IR_basic.h b/include/IR/IR_basic.h index b40fe62..cdcd7ae 100644 --- a/include/IR/IR_basic.h +++ b/include/IR/IR_basic.h @@ -365,9 +365,7 @@ class SelectItem : public ActionItem { } }; class FunctionDefItem : public LLVMIRItemBase { - friend class IRBuilder; - friend void GenerateNaiveASM(std::ostream &os, std::shared_ptr prog); - friend class CFGType BuildCFGForFunction(const std::shared_ptr &func); + public: LLVMType return_type; std::string func_name_raw; std::vector args; @@ -375,7 +373,6 @@ class FunctionDefItem : public LLVMIRItemBase { std::shared_ptr init_block; std::vector> basic_blocks; - public: FunctionDefItem() = default; void RecursivePrint(std::ostream &os) const { os << "define "; diff --git a/include/opt/cfg.h b/include/opt/cfg.h index fd8918b..55d9c63 100644 --- a/include/opt/cfg.h +++ b/include/opt/cfg.h @@ -17,6 +17,11 @@ class CFGNodeType { CFGNodeType *idom; std::vector successors_in_dom_tree; CFGNodeCollection dom_frontier; + + std::vector in_active_vars; + std::vector out_active_vars; + std::vector use_vars; + std::vector def_vars; }; class CFGType { @@ -27,9 +32,100 @@ class CFGType { std::unordered_map label_to_block; }; -CFGNodeCollection GetCFGNodeCollectionsIntersection(const CFGNodeCollection &a, const CFGNodeCollection &b); -CFGNodeCollection GetCFGNodeCollectionsUnion(const CFGNodeCollection &a, const CFGNodeCollection &b); -CFGNodeCollection GetCFGNodeCollectionsDifference(const CFGNodeCollection &a, const CFGNodeCollection &b); -bool CFGNodeCollectionIsSame(const CFGNodeCollection &a, const CFGNodeCollection &b); +template > +Container GetCollectionsIntersection(const Container &a, const Container &b, Compare comp = Compare()) { + Container result; + auto ita = a.begin(); + auto itb = b.begin(); + + while (ita != a.end() && itb != b.end()) { + if (comp(*ita, *itb)) { + ++ita; + } else if (comp(*itb, *ita)) { + ++itb; + } else { + result.push_back(*ita); + ++ita; + ++itb; + } + } + + return result; +} + +template > +Container GetCollectionsUnion(const Container &a, const Container &b, Compare comp = Compare()) { + Container result; + auto ita = a.begin(); + auto itb = b.begin(); + + while (ita != a.end() && itb != b.end()) { + if (comp(*ita, *itb)) { + result.push_back(*ita); + ++ita; + } else if (comp(*itb, *ita)) { + result.push_back(*itb); + ++itb; + } else { + result.push_back(*ita); + ++ita; + ++itb; + } + } + + while (ita != a.end()) { + result.push_back(*ita); + ++ita; + } + + while (itb != b.end()) { + result.push_back(*itb); + ++itb; + } + + return result; +} + +template > +Container GetCollectionsDifference(const Container &a, const Container &b, Compare comp = Compare()) { + Container result; + auto ita = a.begin(); + auto itb = b.begin(); + + while (ita != a.end() && itb != b.end()) { + if (comp(*ita, *itb)) { + result.push_back(*ita); + ++ita; + } else if (comp(*itb, *ita)) { + ++itb; + } else { + ++ita; + ++itb; + } + } + + while (ita != a.end()) { + result.push_back(*ita); + ++ita; + } + + return result; +} + +template > +bool GetCollectionsIsSame(const Container &a, const Container &b, Compare comp = Compare()) { + auto ita = a.begin(); + auto itb = b.begin(); + + while (ita != a.end() && itb != b.end()) { + if (comp(*ita, *itb) || comp(*itb, *ita)) { + return false; + } + ++ita; + ++itb; + } + + return ita == a.end() && itb == b.end(); +} CFGType BuildCFGForFunction(const std::shared_ptr &func); \ No newline at end of file diff --git a/include/opt/liveanalysis.h b/include/opt/liveanalysis.h new file mode 100644 index 0000000..1277896 --- /dev/null +++ b/include/opt/liveanalysis.h @@ -0,0 +1,4 @@ +#pragma once +#include "cfg.h" + +void LiveAnalysis(CFGType &cfg); \ No newline at end of file diff --git a/include/opt/phieliminate.h b/include/opt/phieliminate.h index b66b9e6..3063924 100644 --- a/include/opt/phieliminate.h +++ b/include/opt/phieliminate.h @@ -8,11 +8,10 @@ class MoveInstruct : public ActionItem { std::string src_full; std::string dest_full; MoveInstruct() = default; - void RecursivePrint(std::ostream &os) const { + void RecursivePrint([[maybe_unused]] std::ostream &os) const { throw std::runtime_error("Move instruction is not an actual LLVM IR instruction"); } }; } // namespace opt - std::shared_ptr PhiEliminate(std::shared_ptr src); \ No newline at end of file diff --git a/include/opt/regalloc.h b/include/opt/regalloc.h index 070c2c7..40d8521 100644 --- a/include/opt/regalloc.h +++ b/include/opt/regalloc.h @@ -1,2 +1,5 @@ #pragma once -#include "phieliminate.h" \ No newline at end of file +#include "liveanalysis.h" +#include "phieliminate.h" + +std::shared_ptr RegAlloc(std::shared_ptr src); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index a3571c2..38e8f0f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,8 +3,8 @@ #include #include "IR/IR.h" #include "naivebackend/naivebackend.h" -#include "semantic/semantic.h" #include "opt/opt.h" +#include "semantic/semantic.h" int main(int argc, char **argv) { argparse::ArgumentParser program("zmxcc"); @@ -46,8 +46,9 @@ int main(int argc, char **argv) { GenerateNaiveASM(fout, IR); } else { auto IR_with_out_allocas = Mem2Reg(IR); - // IR_with_out_allocas->RecursivePrint(fout); + IR_with_out_allocas->RecursivePrint(fout); auto IR_with_out_phis = PhiEliminate(IR_with_out_allocas); + auto alloced_code = RegAlloc(IR_with_out_phis); } } catch (const SemanticError &err) { std::cout << err.what() << std::endl; diff --git a/src/opt/cfg.cpp b/src/opt/cfg.cpp index 21e1e46..da64955 100644 --- a/src/opt/cfg.cpp +++ b/src/opt/cfg.cpp @@ -1,95 +1,12 @@ #include "cfg.h" #include -CFGNodeCollection GetCFGNodeCollectionsIntersection(const CFGNodeCollection &a, const CFGNodeCollection &b) { - // assume that thety are both sorted - CFGNodeCollection res; - auto ita = a.begin(); - auto itb = b.begin(); - while (ita != a.end() && itb != b.end()) { - if (*ita == *itb) { - res.push_back(*ita); - ita++; - itb++; - } else if (*ita < *itb) { - ita++; - } else { - itb++; - } - } - return res; -} - -CFGNodeCollection GetCFGNodeCollectionsUnion(const CFGNodeCollection &a, const CFGNodeCollection &b) { - // assume that thety are both sorted - CFGNodeCollection res; - auto ita = a.begin(); - auto itb = b.begin(); - while (ita != a.end() && itb != b.end()) { - if (*ita == *itb) { - res.push_back(*ita); - ita++; - itb++; - } else if (*ita < *itb) { - res.push_back(*ita); - ita++; - } else { - res.push_back(*itb); - itb++; - } - } - while (ita != a.end()) { - res.push_back(*ita); - ita++; - } - while (itb != b.end()) { - res.push_back(*itb); - itb++; - } - return res; -} - -CFGNodeCollection GetCFGNodeCollectionsDifference(const CFGNodeCollection &a, const CFGNodeCollection &b) { - // assume that thety are both sorted - CFGNodeCollection res; - auto ita = a.begin(); - auto itb = b.begin(); - while (ita != a.end() && itb != b.end()) { - if (*ita == *itb) { - ita++; - itb++; - } else if (*ita < *itb) { - res.push_back(*ita); - ita++; - } else { - itb++; - } - } - while (ita != a.end()) { - res.push_back(*ita); - ita++; - } - return res; -} - -bool CFGNodeCollectionIsSame(const CFGNodeCollection &a, const CFGNodeCollection &b) { - auto ita = a.begin(); - auto itb = b.begin(); - while (ita != a.end() && itb != b.end()) { - if (*ita != *itb) { - return false; - } - ita++; - itb++; - } - return ita == a.end() && itb == b.end(); -} CFGType BuildCFGForFunction(const std::shared_ptr &func) { CFGType res; - auto init_block=func->init_block; + auto init_block = func->init_block; if (!func->init_block) { // throw std::runtime_error("Function does not have an init block"); - if(func->basic_blocks.size()==0) throw std::runtime_error("Function does not have any block"); + if (func->basic_blocks.size() == 0) throw std::runtime_error("Function does not have any block"); init_block = func->basic_blocks[0]; } res.label_to_block[init_block->label_full] = init_block.get(); diff --git a/src/opt/liveanalysis.cpp b/src/opt/liveanalysis.cpp new file mode 100644 index 0000000..5af21b7 --- /dev/null +++ b/src/opt/liveanalysis.cpp @@ -0,0 +1,217 @@ +#include "liveanalysis.h" +#include +#include +#include "IR/IR_basic.h" +#include "cfg.h" +#include "tools.h" + +void VarCollect(CFGType &cfg, std::vector &id_to_var, std::unordered_map &var_to_id) { + for (auto node : cfg.nodes) { + auto block = node->corresponding_block; + for (auto act : block->actions) { + if (auto bin_act = std::dynamic_pointer_cast(act)) { + id_to_var.push_back(bin_act->result_full); + var_to_id[bin_act->result_full] = id_to_var.size() - 1; + } else if (auto load_act = std::dynamic_pointer_cast(act)) { + id_to_var.push_back(load_act->result_full); + var_to_id[load_act->result_full] = id_to_var.size() - 1; + } else if (auto get_act = std::dynamic_pointer_cast(act)) { + id_to_var.push_back(get_act->result_full); + var_to_id[get_act->result_full] = id_to_var.size() - 1; + } else if (auto icmp_act = std::dynamic_pointer_cast(act)) { + id_to_var.push_back(icmp_act->result_full); + var_to_id[icmp_act->result_full] = id_to_var.size() - 1; + } else if (auto call_act = std::dynamic_pointer_cast(act)) { + if (!std::holds_alternative(call_act->return_type)) { + id_to_var.push_back(call_act->result_full); + var_to_id[call_act->result_full] = id_to_var.size() - 1; + } + } else if (auto select_act = std::dynamic_pointer_cast(act)) { + id_to_var.push_back(select_act->result_full); + var_to_id[select_act->result_full] = id_to_var.size() - 1; + } + } + } +} + +void UseDefCollect(CFGType &cfg, [[maybe_unused]] std::vector &id_to_var, + std::unordered_map &var_to_id) { + for (auto node : cfg.nodes) { + auto block = node->corresponding_block; + std::vector cur_node_use; + std::vector cur_node_def; + bool use_def_init = false; + for (auto act : block->actions) { + std::vector cur_act_use; + std::vector cur_act_def; + if (auto br_act = std::dynamic_pointer_cast(act)) { + if (var_to_id.find(br_act->cond) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[br_act->cond]); + } + } else if (auto ret_act = std::dynamic_pointer_cast(act)) { + if (!std::holds_alternative(ret_act->type) && var_to_id.find(ret_act->value) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[ret_act->value]); + } + } else if (auto bin_act = std::dynamic_pointer_cast(act)) { + if (var_to_id.find(bin_act->operand1_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[bin_act->operand1_full]); + } + if (bin_act->operand2_full != bin_act->operand1_full && + var_to_id.find(bin_act->operand2_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[bin_act->operand2_full]); + } + if (var_to_id.find(bin_act->result_full) != var_to_id.end()) { + cur_act_def.push_back(var_to_id[bin_act->result_full]); + } + } else if (auto load_act = std::dynamic_pointer_cast(act)) { + if (var_to_id.find(load_act->ptr_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[load_act->ptr_full]); + } + if (var_to_id.find(load_act->result_full) != var_to_id.end()) { + cur_act_def.push_back(var_to_id[load_act->result_full]); + } + } else if (auto store_act = std::dynamic_pointer_cast(act)) { + if (var_to_id.find(store_act->value_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[store_act->value_full]); + } + if (store_act->ptr_full == store_act->value_full) + throw std::runtime_error("store action should not have the same ptr and value"); + if (var_to_id.find(store_act->ptr_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[store_act->ptr_full]); + } + } else if (auto get_act = std::dynamic_pointer_cast(act)) { + std::unordered_set used_vars; + if (var_to_id.find(get_act->ptr_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[get_act->ptr_full]); + used_vars.insert(get_act->ptr_full); + } + for (auto idx : get_act->indices) { + if (used_vars.find(idx) != used_vars.end()) continue; + if (var_to_id.find(idx) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[idx]); + used_vars.insert(idx); + } + } + if (var_to_id.find(get_act->result_full) != var_to_id.end()) { + cur_act_def.push_back(var_to_id[get_act->result_full]); + } + } else if (auto icmp_act = std::dynamic_pointer_cast(act)) { + if (var_to_id.find(icmp_act->operand1_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[icmp_act->operand1_full]); + } + if (icmp_act->operand2_full != icmp_act->operand1_full && + var_to_id.find(icmp_act->operand2_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[icmp_act->operand2_full]); + } + if (var_to_id.find(icmp_act->result_full) != var_to_id.end()) { + cur_act_def.push_back(var_to_id[icmp_act->result_full]); + } + } else if (auto call_act = std::dynamic_pointer_cast(act)) { + std::unordered_set used_vars; + for (auto arg : call_act->args_val_full) { + if (used_vars.find(arg) != used_vars.end()) continue; + if (var_to_id.find(arg) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[arg]); + used_vars.insert(arg); + } + } + if (!std::holds_alternative(call_act->return_type) && + var_to_id.find(call_act->result_full) != var_to_id.end()) { + cur_act_def.push_back(var_to_id[call_act->result_full]); + } + } else if (auto select_act = std::dynamic_pointer_cast(act)) { + if (var_to_id.find(select_act->cond_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[select_act->cond_full]); + } + if (select_act->true_val_full != select_act->cond_full && + var_to_id.find(select_act->true_val_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[select_act->true_val_full]); + } + if (select_act->false_val_full != select_act->cond_full && + select_act->false_val_full != select_act->cond_full && + var_to_id.find(select_act->false_val_full) != var_to_id.end()) { + cur_act_use.push_back(var_to_id[select_act->false_val_full]); + } + if (var_to_id.find(select_act->result_full) != var_to_id.end()) { + cur_act_def.push_back(var_to_id[select_act->result_full]); + } + } + std::sort(cur_act_use.begin(), cur_act_use.end()); + std::sort(cur_act_def.begin(), cur_act_def.end()); + for (size_t i = 1; i < cur_act_use.size(); i++) { + if (cur_act_use[i] == cur_act_use[i - 1]) { + throw std::runtime_error("use variable appears twice in one action"); + } + } + for (size_t i = 1; i < cur_act_def.size(); i++) { + if (cur_act_def[i] == cur_act_def[i - 1]) { + throw std::runtime_error("def variable appears twice in one action"); + } + } + if (!use_def_init) { + use_def_init = true; + cur_node_use = cur_act_use; + cur_node_def = cur_act_def; + } else { + auto use_p = std::move(cur_node_use); + auto def_p = std::move(cur_node_def); + auto use_n = std::move(cur_act_use); + auto def_n = std::move(cur_act_def); + cur_node_use = GetCollectionsUnion(use_p, GetCollectionsDifference(use_n, def_p)); + cur_node_def = GetCollectionsUnion(def_p, use_n); + } + } + node->use_vars = cur_node_use; + node->def_vars = cur_node_def; + } +} + +void LiveAnalysis(CFGType &cfg) { + std::vector id_to_var; + std::unordered_map var_to_id; + VarCollect(cfg, id_to_var, var_to_id); + UseDefCollect(cfg, id_to_var, var_to_id); + std::vector exists; + for (auto node : cfg.nodes) { + node->in_active_vars = node->use_vars; + if (node->successors.size() == 0) { + exists.push_back(node.get()); + } + } + bool all_data_unchanged; + do { + all_data_unchanged = true; + for (auto node : cfg.nodes) { + node->visited = false; + } + std::queue Q; + for (auto e : exists) { + Q.push(e); + e->visited = true; + } + while (Q.size() > 0) { + auto cur_node = Q.front(); + Q.pop(); + for (auto pred : cur_node->predecessors) { + if (!pred->visited) { + pred->visited = true; + Q.push(pred); + } + } + std::vector out_active_vars; + for (auto succ : cur_node->successors) { + out_active_vars = GetCollectionsUnion(out_active_vars, succ->in_active_vars); + } + if (!GetCollectionsIsSame(cur_node->out_active_vars, out_active_vars)) { + all_data_unchanged = false; + cur_node->out_active_vars = std::move(out_active_vars); + } + std::vector in_active_vars = GetCollectionsUnion( + cur_node->use_vars, GetCollectionsDifference(cur_node->out_active_vars, cur_node->def_vars)); + if (!GetCollectionsIsSame(cur_node->in_active_vars, in_active_vars)) { + all_data_unchanged = false; + cur_node->in_active_vars = std::move(in_active_vars); + } + } + } while (!all_data_unchanged); +} \ No newline at end of file diff --git a/src/opt/mem2reg.cpp b/src/opt/mem2reg.cpp index 468f356..442c9f4 100644 --- a/src/opt/mem2reg.cpp +++ b/src/opt/mem2reg.cpp @@ -36,11 +36,11 @@ void BuildDomForFunction(const std::shared_ptr &func, const CFG if (cur->predecessors.size() > 0) { CFGNodeCollection tmp = cur->predecessors[0]->dom; for (size_t i = 1; i < cur->predecessors.size(); i++) { - tmp = GetCFGNodeCollectionsIntersection(tmp, cur->predecessors[i]->dom); + tmp = GetCollectionsIntersection(tmp, cur->predecessors[i]->dom); } - new_dom = GetCFGNodeCollectionsUnion(new_dom, tmp); + new_dom = GetCollectionsUnion(new_dom, tmp); } - if (!CFGNodeCollectionIsSame(new_dom, cur->dom)) { + if (!GetCollectionsIsSame(new_dom, cur->dom)) { all_dom_unchanged = false; cur->dom = new_dom; } @@ -59,10 +59,10 @@ void BuildDomForFunction(const std::shared_ptr &func, const CFG for (auto node : cfg.nodes) { CFGNodeCollection is_frontier_of; CFGNodeCollection tmp1 = {node.get()}; - tmp1 = GetCFGNodeCollectionsDifference(node->dom, tmp1); + tmp1 = GetCollectionsDifference(node->dom, tmp1); for (auto pred : node->predecessors) { - CFGNodeCollection tmp2 = GetCFGNodeCollectionsDifference(pred->dom, tmp1); - is_frontier_of = GetCFGNodeCollectionsUnion(is_frontier_of, tmp2); + CFGNodeCollection tmp2 = GetCollectionsDifference(pred->dom, tmp1); + is_frontier_of = GetCollectionsUnion(is_frontier_of, tmp2); } for (auto frontier_node : is_frontier_of) { frontier_node->dom_frontier.push_back(node.get()); diff --git a/src/opt/phieliminate.cpp b/src/opt/phieliminate.cpp index 26e7306..1cf9879 100644 --- a/src/opt/phieliminate.cpp +++ b/src/opt/phieliminate.cpp @@ -4,7 +4,7 @@ using namespace opt; -void ConductPhiEliminateForFunction([[maybe_unused]] std::shared_ptr func, CFGType &cfg) { +void ConductPhiEliminateForFunction(std::shared_ptr func, CFGType &cfg) { size_t new_block_cnt = 0; for (auto cur_node : cfg.nodes) { auto cur_block = cur_node->corresponding_block; @@ -22,14 +22,23 @@ void ConductPhiEliminateForFunction([[maybe_unused]] std::shared_ptr src_changed; for (auto [_, phi_act] : cur_block->phi_map) { for (auto [src_val, src_label] : phi_act->values) { auto src_block = cfg.label_to_block[src_label]; auto src_node = cfg.block_to_node[src_block]; - if (src_node->successors.size() > 1 && cur_node->predecessors.size() > 1) { + if (src_changed.find(src_label) != src_changed.end()) { + src_block = src_changed[src_label]; + auto new_move = std::make_shared(); + new_move->src_full = src_val; + new_move->dest_full = phi_act->result_full; + src_block->actions.push_back(new_move); + } else if (src_node->successors.size() > 1 && cur_node->predecessors.size() > 1) { // it is a critical edge, need to insert a new block auto new_block = std::make_shared(); + func->basic_blocks.push_back(new_block); new_block->label_full = cur_block->label_full + ".phieliminate." + std::to_string(new_block_cnt++); + src_changed[src_label] = new_block.get(); new_block->exit_action = std::make_shared(); std::dynamic_pointer_cast(new_block->exit_action)->label_full = cur_block->label_full; auto src_block_exit_action = std::dynamic_pointer_cast(src_block->exit_action); @@ -43,7 +52,9 @@ void ConductPhiEliminateForFunction([[maybe_unused]] std::shared_ptrfalse_label_full = new_block->label_full; } else { throw std::runtime_error( - "something strange happened: src block of a critical edge cannot find the corresponding label"); + "something strange happened: src block of a critical edge cannot find the corresponding label, src " + "block label=" + + src_block->label_full); } auto new_move = std::make_shared(); new_move->src_full = src_val; diff --git a/src/opt/regalloc.cpp b/src/opt/regalloc.cpp index 901c92c..ce1d4e3 100644 --- a/src/opt/regalloc.cpp +++ b/src/opt/regalloc.cpp @@ -1 +1,15 @@ -#include "regalloc.h" \ No newline at end of file +#include "regalloc.h" + +void ConductRegAllocForFunction([[maybe_unused]] std::shared_ptr func, CFGType &cfg) { + LiveAnalysis(cfg); +} + +std::shared_ptr RegAlloc(std::shared_ptr src) { + auto res = src; + for (auto &func : res->function_defs) { + // func = std::make_shared(*func); + auto cfg = BuildCFGForFunction(func); + ConductRegAllocForFunction(func, cfg); + } + return res; +} \ No newline at end of file