diff --git a/include/IR/IR_basic.h b/include/IR/IR_basic.h index caf3f46..5c52d78 100644 --- a/include/IR/IR_basic.h +++ b/include/IR/IR_basic.h @@ -84,6 +84,7 @@ class BRAction : public JMPActionItem { NaiveBackend::FuncLayout &layout, const std::unordered_map &low_level_class_info, bool process_phi); + friend class CFGType BuildCFGForFunction(const std::shared_ptr &func); std::string cond; std::string true_label_full; std::string false_label_full; @@ -102,6 +103,7 @@ class UNConditionJMPAction : public JMPActionItem { NaiveBackend::FuncLayout &layout, const std::unordered_map &low_level_class_info, bool process_phi); + friend class CFGType BuildCFGForFunction(const std::shared_ptr &func); std::string label_full; public: @@ -308,6 +310,7 @@ class BlockItem : public LLVMIRItemBase { friend void GenerateNaiveASM(std::ostream &os, std::shared_ptr prog); friend void NaiveBackend::ScanForVar(class NaiveBackend::FuncLayout &layout, std::shared_ptr action, const std::unordered_map &low_level_class_info); + friend class CFGType BuildCFGForFunction(const std::shared_ptr &func); std::string label_full; std::vector> actions; std::shared_ptr exit_action; @@ -447,6 +450,7 @@ class SelectItem : public ActionItem { class FunctionDefItem : public LLVMIRItemBase { friend class IRBuilder; friend void GenerateNaiveASM(std::ostream &os, std::shared_ptr prog); + friend class CFGType BuildCFGForFunction(const std::shared_ptr &func); LLVMType return_type; std::string func_name_raw; std::vector args; @@ -566,6 +570,7 @@ class ModuleItem : public LLVMIRItemBase { friend class IRBuilder; friend std::shared_ptr BuildIR(std::shared_ptr src); friend void GenerateNaiveASM(std::ostream &os, std::shared_ptr prog); + friend std::shared_ptr Mem2Reg(std::shared_ptr src); std::vector> const_strs; std::vector> function_declares; std::vector> type_defs; diff --git a/include/opt/cfg.h b/include/opt/cfg.h new file mode 100644 index 0000000..430025f --- /dev/null +++ b/include/opt/cfg.h @@ -0,0 +1,31 @@ +#pragma once +#include +#include +#include +#include +#include +#include "IR/IR_basic.h" +using CFGNodeCollection = std::list; +class CFGNodeType { + public: + std::vector successors, predecessors; + BlockItem *corresponding_block; + CFGNodeCollection dom; + CFGNodeType *idom; + std::vector successors_in_dom_tree; + CFGNodeCollection dom_frontier; +}; + +class CFGType { + public: + std::vector> nodes; + CFGNodeType *entry; + std::unordered_map block_to_node; + std::unordered_map label_to_block; +}; + +CFGNodeCollection GetCFGNodeCollectionsIntersection(const CFGNodeCollection &a, const CFGNodeCollection &b); +CFGNodeCollection GetCFGNodeCollectionsUnion(const CFGNodeCollection &a, const CFGNodeCollection &b); +CFGNodeCollection GetCFGNodeCollectionsDifference(const CFGNodeCollection &a, const CFGNodeCollection &b); + +CFGType BuildCFGForFunction(const std::shared_ptr &func); \ No newline at end of file diff --git a/include/opt/mem2reg.h b/include/opt/mem2reg.h new file mode 100644 index 0000000..d620907 --- /dev/null +++ b/include/opt/mem2reg.h @@ -0,0 +1,4 @@ +#pragma once +#include "IR/IR_basic.h" + +std::shared_ptr Mem2Reg(std::shared_ptr src); \ No newline at end of file diff --git a/include/opt/opt.h b/include/opt/opt.h new file mode 100644 index 0000000..4f57210 --- /dev/null +++ b/include/opt/opt.h @@ -0,0 +1,2 @@ +#include "mem2reg.h" +#include "cfg.h" \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e90b03b..4fc1ffa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,8 +2,9 @@ add_subdirectory(ast) add_subdirectory(semantic) add_subdirectory(IR) add_subdirectory(naivebackend) +add_subdirectory(opt) add_executable(zmxcc main.cpp) -target_link_libraries(zmxcc semantic argparse IR naivebackend) +target_link_libraries(zmxcc semantic argparse IR naivebackend opt) set_target_properties(zmxcc PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" ) \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 79e47c1..91c5d3a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include "IR/IR.h" #include "naivebackend/naivebackend.h" #include "semantic/semantic.h" +#include "opt/opt.h" int main(int argc, char **argv) { argparse::ArgumentParser program("zmxcc"); @@ -12,10 +13,9 @@ int main(int argc, char **argv) { program.add_argument("-o", "--output").help("output file path").nargs(1).required(); - program.add_argument("--naive-IR") - .help("output unoptimized LLVM IR code") - .default_value(false) - .implicit_value(true); + program.add_argument("--naive-IR").help("output unoptimized LLVM IR code").default_value(false).implicit_value(true); + + program.add_argument("--optimize-all").help("enable all optimizations").default_value(false).implicit_value(true); try { program.parse_args(argc, argv); @@ -28,6 +28,7 @@ int main(int argc, char **argv) { auto input_file = program.get("input"); auto output_file = program.get("output"); bool output_naive_ir = program.get("--naive-IR"); + bool optimize_all = program.get("--optimize-all"); std::ifstream fin(input_file); std::ofstream fout(output_file); @@ -41,9 +42,12 @@ int main(int argc, char **argv) { IR->RecursivePrint(fout); return 0; } - - IR->RecursivePrint(std::cerr); - GenerateNaiveASM(fout, IR); + if (!optimize_all) { + GenerateNaiveASM(fout, IR); + } else { + auto IR_with_out_allocas = Mem2Reg(IR); + IR_with_out_allocas->RecursivePrint(fout); + } } catch (const SemanticError &err) { std::cout << err.what() << std::endl; return err.GetErrorCode(); diff --git a/src/opt/CMakeLists.txt b/src/opt/CMakeLists.txt new file mode 100644 index 0000000..846e500 --- /dev/null +++ b/src/opt/CMakeLists.txt @@ -0,0 +1,5 @@ +include_directories(${CMAKE_SOURCE_DIR}/include/opt) + +file(GLOB NAIVE_BACKEND_SOURCES "*.cpp") +add_library(opt STATIC ${NAIVE_BACKEND_SOURCES}) +target_link_libraries(opt PUBLIC ast) \ No newline at end of file diff --git a/src/opt/cfg.cpp b/src/opt/cfg.cpp new file mode 100644 index 0000000..0450705 --- /dev/null +++ b/src/opt/cfg.cpp @@ -0,0 +1,109 @@ +#include "cfg.h" +#include +CFGNodeCollection GetCFGNodeCollectionsIntersection(const CFGNodeCollection &a, const CFGNodeCollection &b) { + // assume that thety are both sorted + CFGNodeCollection res; + auto ita = a.begin(); + auto itb = b.begin(); + while (ita != a.end() && itb != b.end()) { + if (*ita == *itb) { + res.push_back(*ita); + ita++; + itb++; + } else if (*ita < *itb) { + ita++; + } else { + itb++; + } + } + return res; +} + +CFGNodeCollection GetCFGNodeCollectionsUnion(const CFGNodeCollection &a, const CFGNodeCollection &b) { + // assume that thety are both sorted + CFGNodeCollection res; + auto ita = a.begin(); + auto itb = b.begin(); + while (ita != a.end() && itb != b.end()) { + if (*ita == *itb) { + res.push_back(*ita); + ita++; + itb++; + } else if (*ita < *itb) { + res.push_back(*ita); + ita++; + } else { + res.push_back(*itb); + itb++; + } + } + while (ita != a.end()) { + res.push_back(*ita); + ita++; + } + while (itb != b.end()) { + res.push_back(*itb); + itb++; + } + return res; +} + +CFGNodeCollection GetCFGNodeCollectionsDifference(const CFGNodeCollection &a, const CFGNodeCollection &b) { + // assume that thety are both sorted + CFGNodeCollection res; + auto ita = a.begin(); + auto itb = b.begin(); + while (ita != a.end() && itb != b.end()) { + if (*ita == *itb) { + ita++; + itb++; + } else if (*ita < *itb) { + res.push_back(*ita); + ita++; + } else { + itb++; + } + } + while (ita != a.end()) { + res.push_back(*ita); + ita++; + } + return res; +} + +CFGType BuildCFGForFunction(const std::shared_ptr &func) { + CFGType res; + if (!func->init_block) { + throw std::runtime_error("Function does not have an init block"); + } + res.label_to_block[func->init_block->label_full] = func->init_block.get(); + res.nodes.push_back(std::make_shared()); + res.entry = res.nodes.back().get(); + res.entry->corresponding_block = func->init_block.get(); + res.block_to_node[func->init_block.get()] = res.entry; + for (auto block_ptr : func->basic_blocks) { + res.label_to_block[block_ptr->label_full] = block_ptr.get(); + res.nodes.push_back(std::make_shared()); + res.nodes.back()->corresponding_block = block_ptr.get(); + res.block_to_node[block_ptr.get()] = res.nodes.back().get(); + } + // now add information for successors and predecessors + for (auto node : res.nodes) { + auto block = node->corresponding_block; + if (block->exit_action) { + if (auto br = std::dynamic_pointer_cast(block->exit_action)) { + node->successors.push_back(res.block_to_node[res.label_to_block[br->true_label_full]]); + node->successors.push_back(res.block_to_node[res.label_to_block[br->false_label_full]]); + } else if (auto uncond = std::dynamic_pointer_cast(block->exit_action)) { + node->successors.push_back(res.block_to_node[res.label_to_block[uncond->label_full]]); + } else if (auto ret = std::dynamic_pointer_cast(block->exit_action)) { + // do nothing + } else { + throw std::runtime_error("Unknown exit action"); + } + } else { + throw std::runtime_error("Block does not have an exit action"); + } + } + return res; +} \ No newline at end of file diff --git a/src/opt/mem2reg.cpp b/src/opt/mem2reg.cpp new file mode 100644 index 0000000..fbc8ca5 --- /dev/null +++ b/src/opt/mem2reg.cpp @@ -0,0 +1,13 @@ +#include "mem2reg.h" +#include "cfg.h" + +void ConductMem2RegForFunction(const std::shared_ptr &func, const CFGType &cfg) {} +std::shared_ptr Mem2Reg(std::shared_ptr src) { + auto res = std::make_shared(*src); + for (auto &func : res->function_defs) { + func = std::make_shared(*func); + auto cfg = BuildCFGForFunction(func); + ConductMem2RegForFunction(func, cfg); + } + return res; +} \ No newline at end of file