代码实现——LLVM IR Printer
文件用于标准化 LLVM IR(Intermediate Representation)中的函数、模块、循环、SCC(Strongly Connected Components),并将处理后的 IR 以标准格式写入指定目录。
它可以用于 优化前后 IR 结构对比,以及 去除调试信息 以提升 IR 可读性和一致性。
目的是制作数据集。
涵盖的功能有:
- 标准化 IR 结构:
- 清除 BasicBlock 和 Instruction 的名称,确保一致性。
- 处理
Function
、Module
、Loop
、LazyCallGraph::SCC
等不同 IR 单元。
- 文件存储:
- IR 处理后,存储到指定目录,按照
模块名-哈希值-时间戳-后缀.ll
进行命名,方便后续分析。 - 利用
llvm::sys::fs::create_directories
自动创建存储目录,避免手动管理。
- IR 处理后,存储到指定目录,按照
- 哈希计算:
- 采用 6 位十六进制哈希 计算函数名,避免冗长的文件名,并确保唯一性。
- 模块名优化:
- 去掉 LLVM 模块的完整路径,仅保留文件名,避免文件路径污染模块命名。
#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" // 关键修改:引入 LLVM 的路径处理库 #include "llvm/Support/raw_ostream.h" #include <chrono> #include <ctime> #include <functional> #include <iomanip> #include <sstream> using namespace llvm; static void standardizeFunction(Function &F) { for (BasicBlock &BB : F) { BB.setName(""); for (Instruction &I : BB) { I.setName(""); } } } static void standardizeModule(Module &M) { for (Function &F : M) standardizeFunction(F); } static void standardizeLoop(Loop &L) { for (auto *BB : L.blocks()) { BB->setName(""); for (Instruction &I : *BB) I.setName(""); } } static void standardizeSCC(LazyCallGraph::SCC &S) { for (auto &N : S) standardizeFunction(N.getFunction()); } static std::string get6HexHash(const std::string &Name) { std::hash<std::string> H; std::size_t V = H(Name); std::stringstream S; S << std::hex << std::setw(6) << std::setfill('0') << (V & 0xFFFFFF); return S.str(); } template <typename IRUnitT> void standardizeAndWriteIR(IRUnitT &IR, const std::string &Suffix, const std::string &ShortTimestamp, const std::string &OutputBaseDir = "/home/yz/clean/DBGMASTER/IR_DUMP_DIR/") { std::string ModuleName = "UnknownModule"; std::string FuncHash = "000000"; // 获取模块名称 if constexpr (std::is_same_v<IRUnitT, Module>) { ModuleName = IR.getName().str(); if (ModuleName.empty()) ModuleName = "UnknownModule"; else ModuleName = sys::path::filename(ModuleName).str(); // 仅获取文件名 standardizeModule(IR); } else if constexpr (std::is_same_v<IRUnitT, Function>) { Module *M = IR.getParent(); if (M && !M->getName().empty()) ModuleName = sys::path::filename(M->getName()).str(); std::string FuncName = IR.getName().str(); if (FuncName.empty()) FuncName = "UnknownFunc"; FuncHash = get6HexHash(FuncName); standardizeFunction(IR); } else if constexpr (std::is_same_v<IRUnitT, Loop>) { Function *F = IR.getHeader() ? IR.getHeader()->getParent() : nullptr; if (F) { Module *M = F->getParent(); if (M && !M->getName().empty()) ModuleName = sys::path::filename(M->getName()).str(); std::string FuncName = F->getName().str(); if (!FuncName.empty()) FuncHash = get6HexHash(FuncName); } standardizeLoop(IR); } else if constexpr (std::is_same_v<IRUnitT, LazyCallGraph::SCC>) { ModuleName = "SCC"; standardizeSCC(IR); } else { errs() << "Unknown IR type in standardizeAndWriteIR, skipping...\n"; return; } // 计算最终输出目录(IR_DUMP_DIR/module_name/) std::string ModuleOutputDir = OutputBaseDir + ModuleName + "/"; if (std::error_code EC = sys::fs::create_directories(ModuleOutputDir)) { errs() << "Error: Failed to create directory: " << ModuleOutputDir << " (" << EC.message() << ")\n"; return; } // 计算输出文件名 std::string OutputFilename = ModuleOutputDir + ModuleName + "-" + FuncHash + "-" + ShortTimestamp + "-" + Suffix + ".ll"; // 打开文件进行写入 std::error_code EC; raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OF_None); if (!EC) { if constexpr (std::is_same_v<IRUnitT, Module> || std::is_same_v<IRUnitT, Function>) { IR.print(OutFile, nullptr); } else { errs() << "Unsupported IR type for printing.\n"; } } else { errs() << "Failed to write IR to file: " << OutputFilename << " (" << EC.message() << ")\n"; } }
- 去掉 LLVM 模块的完整路径,仅保留文件名,避免文件路径污染模块命名。
可以直接用这个 patch 包(仅针对 SimplifyCFG 当前):
diff --git a/llvm/include/llvm/Transforms/Utils/MyExtension/StandardIRPrinter.h b/llvm/include/llvm/Transforms/Utils/MyExtension/StandardIRPrinter.h
new file mode 100644
index 000000000000..29de766eac9f
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/MyExtension/StandardIRPrinter.h
@@ -0,0 +1,129 @@
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h" // 关键修改:引入 LLVM 的路径处理库
+#include "llvm/Support/raw_ostream.h"
+#include <chrono>
+#include <ctime>
+#include <functional>
+#include <iomanip>
+#include <sstream>
+
+using namespace llvm;
+
+#define PRIOR_PRINTER(Any, PASS_NAME, TIMESTAMP_VAR) \
+ auto Now = std::chrono::system_clock::now(); \
+ auto TIMESTAMP_VAR = std::chrono::duration_cast<std::chrono::nanoseconds>(Now.time_since_epoch()).count(); \
+ std::string TIMESTAMP_VAR##Str = std::to_string(TIMESTAMP_VAR); \
+ std::string TIMESTAMP_VAR##Short = TIMESTAMP_VAR##Str.substr(TIMESTAMP_VAR##Str.length() - 8); \
+ LLVM_DEBUG(dbgs() << "TIMENOW-" << TIMESTAMP_VAR##Short << "\n"); \
+ standardizeAndWriteIR(Any, PASS_NAME "-before", TIMESTAMP_VAR##Short);
+
+#define POST_PRINTER(Any, PASS_NAME, TIMESTAMP_VAR) \
+ standardizeAndWriteIR(Any, PASS_NAME "-after", TIMESTAMP_VAR##Short);
+
+static void standardizeFunction(Function &F) {
+ for (BasicBlock &BB : F) {
+ BB.setName("");
+ for (Instruction &I : BB) {
+ I.setName("");
+ }
+ }
+}
+
+static void standardizeModule(Module &M) {
+ for (Function &F : M)
+ standardizeFunction(F);
+}
+
+static void standardizeLoop(Loop &L) {
+ for (auto *BB : L.blocks()) {
+ BB->setName("");
+ for (Instruction &I : *BB)
+ I.setName("");
+ }
+}
+
+static void standardizeSCC(LazyCallGraph::SCC &S) {
+ for (auto &N : S)
+ standardizeFunction(N.getFunction());
+}
+
+static std::string get6HexHash(const std::string &Name) {
+ std::hash<std::string> H;
+ std::size_t V = H(Name);
+ std::stringstream S;
+ S << std::hex << std::setw(6) << std::setfill('0') << (V & 0xFFFFFF);
+ return S.str();
+}
+
+template <typename IRUnitT>
+void standardizeAndWriteIR(IRUnitT &IR, const std::string &Suffix, const std::string &ShortTimestamp,
+ const std::string &OutputBaseDir = "/home/yz/clean/DBGMASTER/IR_DUMP_DIR/") {
+ std::string ModuleName = "UnknownModule";
+ std::string FuncHash = "000000";
+
+ // 获取模块名称
+ if constexpr (std::is_same_v<IRUnitT, Module>) {
+ ModuleName = IR.getName().str();
+ if (ModuleName.empty())
+ ModuleName = "UnknownModule";
+ else
+ ModuleName = sys::path::filename(ModuleName).str(); // 仅获取文件名
+ standardizeModule(IR);
+ } else if constexpr (std::is_same_v<IRUnitT, Function>) {
+ Module *M = IR.getParent();
+ if (M && !M->getName().empty())
+ ModuleName = sys::path::filename(M->getName()).str();
+ std::string FuncName = IR.getName().str();
+ if (FuncName.empty())
+ FuncName = "UnknownFunc";
+ FuncHash = get6HexHash(FuncName);
+ standardizeFunction(IR);
+ } else if constexpr (std::is_same_v<IRUnitT, Loop>) {
+ Function *F = IR.getHeader() ? IR.getHeader()->getParent() : nullptr;
+ if (F) {
+ Module *M = F->getParent();
+ if (M && !M->getName().empty())
+ ModuleName = sys::path::filename(M->getName()).str();
+ std::string FuncName = F->getName().str();
+ if (!FuncName.empty())
+ FuncHash = get6HexHash(FuncName);
+ }
+ standardizeLoop(IR);
+ } else if constexpr (std::is_same_v<IRUnitT, LazyCallGraph::SCC>) {
+ ModuleName = "SCC";
+ standardizeSCC(IR);
+ } else {
+ errs() << "Unknown IR type in standardizeAndWriteIR, skipping...\n";
+ return;
+ }
+
+ // 计算最终输出目录(IR_DUMP_DIR/module_name/)
+ std::string ModuleOutputDir = OutputBaseDir + ModuleName + "/";
+ if (std::error_code EC = sys::fs::create_directories(ModuleOutputDir)) {
+ errs() << "Error: Failed to create directory: " << ModuleOutputDir << " (" << EC.message() << ")\n";
+ return;
+ }
+
+ // 计算输出文件名
+ std::string OutputFilename = ModuleOutputDir + ModuleName + "-" + FuncHash + "-" + ShortTimestamp + "-" + Suffix + ".ll";
+
+ // 打开文件进行写入
+ std::error_code EC;
+ raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OF_None);
+ if (!EC) {
+ if constexpr (std::is_same_v<IRUnitT, Module> || std::is_same_v<IRUnitT, Function>) {
+ IR.print(OutFile, nullptr);
+ } else {
+ errs() << "Unsupported IR type for printing.\n";
+ }
+ } else {
+ errs() << "Failed to write IR to file: " << OutputFilename << " (" << EC.message() << ")\n";
+ }
+}
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 4e437e9abeb4..348c30934357 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,6 +42,7 @@
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
+#include "llvm/Transforms/Utils/MyExtension/StandardIRPrinter.h"
#include <utility>
using namespace llvm;
@@ -373,6 +374,7 @@ void SimplifyCFGPass::printPipeline(
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
+ PRIOR_PRINTER(F, "simplifycfg", Timestamp);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
Options.AC = &AM.getResult<AssumptionAnalysis>(F);
DominatorTree *DT = nullptr;
@@ -383,6 +385,7 @@ PreservedAnalyses SimplifyCFGPass::run(Function &F,
PreservedAnalyses PA;
if (RequireAndPreserveDomTree)
PA.preserve<DominatorTreeAnalysis>();
+ POST_PRINTER(F, "simplifycfg", Timestamp);
return PA;
}
评论