技术导读——LLVM指令调度
有关指令调度的学术笔记已经有比较多了,这里只列举一些我觉得需要注意的内容,其余的可以直接参考“附录的参考文献”。如果你对下面的概念都熟悉,说明你已经有了初步的基础,反之,你可以去了解前置需要学习的概念。(其中,参考文献一最好)
后续内容主要在于对 LLVM 中的实际内容的解读,而不是一下这些通用概念
- Throughput
- Latency
- reservation station
- 静态调度 vs 动态调度
- IssueWidth
- 数据型冒险
- 结构型冒险
- 控制型冒险
- 流水线 stall
- RAW, WAR, WAW
- 指令调度与寄存器分配
①SelectionDAGISel
📌 阶段: 指令选择(Instruction Selection)后,在 DAG(Directed Acyclic Graph)上进行调度
📌 主要 Pass: SelectionDAGISel
📌 优化目标: 决定指令的发射顺序,减少流水线停顿,提高吞吐量
可选算法:
调度算法 | 说明 |
---|---|
list-burr |
最老的 List Scheduling,基于 bottom-up register reduction |
list-hybrid |
列表调度,平衡延迟和寄存器压力 |
list-ilp |
列表调度,平衡 ILP 和寄存器压力 |
linearize |
按 LLVM IR 顺序执行(不调度) |
fast |
启发式方法,快速调度,牺牲部分优化 |
default |
目标平台决定,也是默认的 |
②MachineScheduler
enable-misched
和 enable-post-misched
选项通常默认开启。但是 post-RA-scheduler
默认关闭。
源码解读①preRASched
指令调度的配置位于 SelectionDAGISel. cpp:
ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
return ISHeuristic(this, OptLevel);
}
ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
CodeGenOptLevel OptLevel) {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
// Try first to see if the Target has its own way of selecting a scheduler
if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
return SchedulerCtor(IS, OptLevel);
}
if (OptLevel == CodeGenOptLevel::None ||
(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::Hybrid)
return createHybridListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::VLIW)
return createVLIWDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::Fast)
return createFastDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::Linearize)
return createDAGLinearizer(IS, OptLevel);
assert(TLI->getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
}
如果不加命令行参数选择特定指令调度器,会进入
createSourceListDAGScheduler
。ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD); # 核心语句
return SD;
}
指令调度的真正执行在:
ScheduleDAGSDNodes *Scheduler = CreateScheduler();
{
NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
GroupDescription, TimePassesIsEnabled);
Scheduler->Run(CurDAG, FuncInfo->MBB);
}
进一步: ScheduleDAGSDNodes. cpp
/// Run - perform scheduling.
///
void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
BB = bb;
DAG = dag;
// Clear the scheduler's SUnit DAG.
ScheduleDAG::clearDAG();
Sequence.clear();
// Invoke the target's selection of scheduler.
Schedule(); # step into
}
下一步,终于有日志提示了:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
<< " '" << BB->getName() << "' **********\n");
CurCycle = 0;
IssueCount = 0;
MinAvailableCycle =
DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence. LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
CallSeqEndForStart.clear();
assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
// Build the scheduling graph.
BuildSchedGraph(nullptr);
LLVM_DEBUG(dump());
Topo.MarkDirty();
AvailableQueue->initNodes(SUnits);
HazardRec->Reset();
// Execute the actual scheduling loop.
ListScheduleBottomUp(); # step into
AvailableQueue->releaseState();
LLVM_DEBUG({
dbgs() << "*** Final schedule ***\n";
dumpSchedule();
dbgs() << '\n';
});}
下面可能会有层层嵌套的函数,处理完才开始真正调度每条指令:
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
/// schedulers.
void ScheduleDAGRRList::ListScheduleBottomUp() {
// Release any predecessors of the special Exit node.
ReleasePredecessors(&ExitSU); # 释放前驱(前驱已经ready)
// Add root to Available queue.
if (!SUnits.empty()) {
SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
RootSU->isAvailable = true;
AvailableQueue->push(RootSU); # 加入root节点:调度的起点
}
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty() || !Interferences.empty()) { # 主流程都在这个循环体内
LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
// consideration.
SUnit *SU = PickNodeToScheduleBottomUp(); # 选择最佳的指令(后面来看选择的策略)
AdvancePastStalls(SU); # 处理流水线停顿
ScheduleNodeBottomUp(SU); # 执行调度、处理给中依赖
while (AvailableQueue->empty() && !PendingQueue.empty()) { # 这一段的含义是假如此时没有任何指令可以调度(全部要等待流水线),那么增加周期到第一个可以调度的指令!
// Advance the cycle to free resources. Skip ahead to the next ready SU.
assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
"MinAvailableCycle uninitialized");
AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
} }
// Reverse the order if it is bottom up.
std::reverse(Sequence.begin(), Sequence.end()); # 自底向上需要返序
#ifndef NDEBUG
VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
那么这一块引入了四个核心函数,我们一起看一下:
- PickNodeToScheduleBottomUp:逻辑较为复杂,简要为:
- 尝试从 AvailableQueue 选择最高优先级的指令 (CurSU),并检查其是否因寄存器冲突而需要延迟 (DelayForLiveRegsBottomUp)。
- 若有寄存器冲突,则将 CurSU 放入 Interferences 队列,并继续尝试下一个候选指令。若所有候选指令都因寄存器冲突而无法调度:
- 尝试回溯 (BacktrackBottomUp) 以重新安排先前的指令,使当前指令可执行。
- 如果回溯失败,则尝试复制 (Duplicate) 相关指令 或 插入额外的寄存器复制指令 (InsertCopiesAndMoveSuccs) 以打破冲突。
- 最终选定一个可以调度的指令并返回 (CurSU),确保指令调度过程不会卡住。
- AdvancePastStalls:这个函数的作用是确保当前调度单元 (SU) 可以被安全调度,即所有依赖的指令都已执行完毕,并且资源无冲突。首先,它推进当前调度周期 (CurCycle) 至 SU 最早可执行的时间点 (ReadyCycle),确保其依赖的指令已完成。然后,它检查 SU 是否存在资源冲突(例如流水线冲突),如果有,则不断增加等待周期 (Stalls),直到冲突消除。最终,它确保 SU 在正确的周期调度,不会因依赖未就绪或资源冲突而出错。
- ScheduleNodeBottomUp: 采用贪心策略进行指令调度,它在每个周期优先调度当前可用、优先级最高的指令,并尽可能释放资源,使后续指令尽早可用。它主要通过寄存器可用性、数据冒险检测、流水线资源等因素来决定何时推进调度周期,以保证指令的最大并行度。==优先级最高,如何来,我们后面再继续讲==
- AdvanceToCycle:这一块理解就可,不多叙述
其中最简单的 linearize 调度方法
其简单逻辑是:void ScheduleDAGLinearize::Schedule() { LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n"); SmallVector<SDNode*, 8> Glues; unsigned DAGSize = 0; for (SDNode &Node : DAG->allnodes()) { SDNode *N = &Node; // Use node id to record degree. unsigned Degree = N->use_size(); N->setNodeId(Degree); unsigned NumVals = N->getNumValues(); if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && N->hasAnyUseOfValue(NumVals-1)) { SDNode *User = findGluedUser(N); if (User) { Glues.push_back(N); GluedMap.insert(std::make_pair(N, User)); } } if (N->isMachineOpcode() || (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) ++DAGSize; } for (SDNode *Glue : Glues) { SDNode *GUser = GluedMap[Glue]; unsigned Degree = Glue->getNodeId(); unsigned UDegree = GUser->getNodeId(); // Glue user must be scheduled together with the glue operand. So other // users of the glue operand must be treated as its users. SDNode *ImmGUser = Glue->getGluedUser(); for (const SDNode *U : Glue->uses()) if (U == ImmGUser) --Degree; GUser->setNodeId(UDegree + Degree); Glue->setNodeId(1); } Sequence.reserve(DAGSize); ScheduleNode(DAG->getRoot().getNode()); }
- 遍历 DAG(Directed Acyclic Graph)中的所有节点,基于 使用关系 (use_size()) 计算调度顺序。
- 处理 Glue 依赖(特殊的 MVT:: Glue 连接),保证 Glue 相关的指令必须被一起调度。
- 线性化 DAG,并存入 Sequence,以便后续指令选择和寄存器分配。
源码解读 PostRASched
源码位于 PostRASchdedulerList. cpp,默认关闭,核心位于 runOnFuntion:逻辑和 preRA 类似bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; TII = Fn.getSubtarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); RegClassInfo.runOnMachineFunction(Fn); TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), AntiDepMode, CriticalPathRCs)) return false; // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtargetInfo::ANTIDEP_ALL : ((EnableAntiDepBreaking == "critical") ? TargetSubtargetInfo::ANTIDEP_CRITICAL : TargetSubtargetInfo::ANTIDEP_NONE); } LLVM_DEBUG(dbgs() << "PostRAScheduler\n"); SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); // Loop over all of the basic blocks for (auto &MBB : Fn) { #ifndef NDEBUG // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod if (DebugDiv > 0) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":" << printMBBReference(MBB) << " ***\n"; }#endif // Initialize register live-range state for scheduling in this block. Scheduler.startBlock(&MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. MachineBasicBlock::iterator Current = MBB.end(); unsigned Count = MBB.size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) { MachineInstr &MI = *std::prev(I); --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) { Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = &MI; CurrentCount = Count; Scheduler.Observe(MI, CurrentCount); } I = MI; if (MI.isBundle()) Count -= MI.getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); assert((MBB.begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); // Clean up register live-range state. Scheduler.finishBlock(); // Update register kills Scheduler.fixupKills(MBB); } return true; }
源码解读 machine-scheduler(RA 前和 RA 后都在一个文件)
入口位于 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
最深层的选择最优策略在 tryCandidate 里:
将按照以下优先级执行:
- 物理寄存器偏好
• 目标是让物理寄存器的定义和使用尽量相邻,减少物理寄存器的生命周期,降低寄存器竞争。 - 寄存器压力控制
• 避免调度会导致寄存器溢出的指令,防止因寄存器不足导致性能下降或溢出到内存(增加额外的 spill/fill 负担)。 - 关键寄存器压力控制
• 额外关注那些会导致关键寄存器集合(如浮点寄存器组、通用寄存器组)压力过高的指令,避免让关键寄存器成为瓶颈。 - 资源占用控制
• 避免调度会过度占用执行资源(如 ALU、FPU 或 Load/Store 单元)的指令,确保资源能均衡使用,不会在后续指令调度时出现资源不足的情况。 - 指令延迟优化
• 优先调度高延迟指令(如乘法、内存访问),使其尽早开始执行,从而减少流水线的等待时间,提高整体吞吐量。 - 指令簇优化
• 如果两条指令属于同一个优化簇(Cluster,例如连续的 Load/Store 操作),那么会优先调度属于相同簇的指令,以减少乱序执行导致的额外调度开销。 - 弱依赖优化
• 优先调度依赖较少的指令,让它们更快地完成并释放依赖关系,使更多的指令可以进入可调度状态,提高整体指令吞吐量。 - 避免增加全局寄存器压力
• 除了局部寄存器压力外,还会检查整个程序区域的寄存器使用情况,避免调度导致寄存器长期占用过高,影响后续调度。 - 回退到指令原始顺序
• 如果所有的启发式规则都无法区分候选指令的优先级,则按照代码原始顺序调度,以保持代码结构的稳定性。Bug 发现
我需要研究一下为什么在开启 O1/O2/O3 时有时不能使用-mllvm -pre-RA-sched=linearize
的问题。可能是另一个 Issue源码解析
想要在 pre-RA-sched 阶段拿到他是对什么内容进行调度的,可以观察如下源码:
在ScheduleDAGSDNodes
类中有一个std::vector<SUnit*> Sequence;
,而 Seqence 正是需要调度的序列。
-view-sched-dags
但是哪一个对性能提升最大呢?(有待数据说话)
GPT 看法:
ScheduleDAGSDNodes(影响较小): - 需要较好地 控制 IR 到 MachineInstr 生成的初始顺序,降低寄存器压力。
- 作用范围仅限 SDNode,不考虑 CPU 执行单元的调度。
- 之后 MIScheduler 仍可能会大幅调整顺序。
- 无法优化 Load/Store 指令的内存访问模式,也不会对 Pipeline Stall 进行优化。
MIScheduler(影响更大,重点优化目标) - 指令并行性(ILP)优化:让 CPU 尽可能同时执行多个指令。
- 避免流水线 Stalls:调整 Load/Store 顺序,优化 Cache 访问。
- 分支预测优化:减少控制依赖带来的损失。
- 寄存器使用优化:减少寄存器重命名冲突。
- 现代超标量 CPU,可大幅减少 Pipeline Stall,提升指令吞吐量。
ScheduleDAGSDNodes 主要是优化 MachineInstr 生成的初始顺序,但后续 MIScheduler 仍可能会覆盖其优化效果。除非你的目标是 特定 ISA(如 VLIW),否则 最终执行效率主要取决于 MIScheduler。
日志分析
我自己简单插了几个桩,打印了一些调度前后的日志。
ScheduleDAGISel 阶段的日志
=======Before List Scheduling
SU(0): t8: ch = RET_ReallyLR Register:i32 $w0, t7, t7:1
t7: ch,glue = CopyToReg t4, Register:i32 $w0, t10
# preds left : 2
# succs left : 0
# rdefs left : 0
Latency : 1
Depth : 2
Height : 0
Predecessors:
SU(2): Ord Latency=1 Barrier
SU(1): Data Latency=1
SU(1): t10: i32,ch = CopyFromReg t0, Register:i32 $wzr
# preds left : 0
# succs left : 1
# rdefs left : 1
Latency : 1
Depth : 0
Height : 1
Successors:
SU(0): Data Latency=1
SU(2): t4: ch = lifetime.end<0 to 4> t2, TargetFrameIndex:i64<0>
# preds left : 1
# succs left : 1
# rdefs left : 0
Latency : 1
Depth : 1
Height : 1
Predecessors:
SU(3): Ord Latency=1 Barrier
Successors:
SU(0): Ord Latency=1 Barrier
SU(3): t2: ch = lifetime.end<0 to 4> t0, TargetFrameIndex:i64<1>
# preds left : 0
# succs left : 1
# rdefs left : 0
Latency : 1
Depth : 0
Height : 2
Successors:
SU(2): Ord Latency=1 Barrier
=======Dump Pre List Scheduling
=======Dump List Scheduling
SU(3): t2: ch = lifetime.end<0 to 4> t0, TargetFrameIndex:i64<1>
SU(2): t4: ch = lifetime.end<0 to 4> t2, TargetFrameIndex:i64<0>
SU(1): t10: i32,ch = CopyFromReg t0, Register:i32 $wzr
SU(0): t8: ch = RET_ReallyLR Register:i32 $w0, t7, t7:1
t7: ch,glue = CopyToReg t4, Register:i32 $w0, t10
这一段日志清晰可读,相对后面的你们就知道了。毕竟 SDNode(SUnit)在 DAG 里还是相对简化的内容。
我做了一个有意思的实验,把 ScheduleDAGRRList 里的std::reverse(Sequence.begin(), Sequence.end()); 注释掉了,结果断言错误了:Assertion failed: (I != VRBaseMap.end() && "Node emitted out of order - late"), function getVR, file InstrEmitter.cpp, line 284.
这是在调度后的一个步骤。这个可能是为后续寄存器分配的分析时的错误。
- VRBaseMap 中没有 Op 的映射,即:
- InstrEmitter::EmitMachineNode() 可能还没给 Op 分配 VReg。
- Op 可能本应该更早出现,但由于 Sequence 调度顺序问题,导致 Op 还没被 EmitMachineNode() 处理。
ScheduleDAGMI
下面这就比较逆天了, 因为是 MIR,里面可以说啥信息都有,都附加在了指令上面,所以非常冗长
========= Before MSched
********** INTERVALS **********
WSP [16r,16d:23)[64r,64d:22)[80r,80d:21)[96r,96d:20)[224r,224d:19)[240r,240d:18)[688r,688d:17)[832r,832d:16)[848r,848d:15)[944r,944d:14)[1056r,1056d:13)[1072r,1072d:12)[1168r,1168d:11)[1280r,1280d:10)[1296r,1296d:9)[1408r,1408d:8)[1536r,1536d:7)[1552r,1552d:6)[1648r,1648d:5)[1760r,1760d:4)[1776r,1776d:3)[1824r,1824d:2)[1872r,1872d:1)[1888r,1888d:0) 0@1888r 1@1872r 2@1824r 3@1776r 4@1760r 5@1648r 6@1552r 7@1536r 8@1408r 9@1296r 10@1280r 11@1168r 12@1072r 13@1056r 14@944r 15@848r 16@832r 17@688r 18@240r 19@224r 20@96r 21@80r 22@64r 23@16r
%12 [32r,48r:0) 0@32r weight:0.000000e+00
%15 [128r,144r:0) 0@128r weight:0.000000e+00
%16 [160r,176r:0) 0@160r weight:0.000000e+00
%17 [192r,208r:0) 0@192r weight:0.000000e+00
%25 [480r,512r:0) 0@480r weight:0.000000e+00
%26 [496r,512r:0) 0@496r weight:0.000000e+00
%27 [512r,528r:0) 0@512r weight:0.000000e+00
%28 [544r,560r:0) 0@544r weight:0.000000e+00
%31 [288r,720r:0) 0@288r weight:0.000000e+00
%34 [272r,752r:0) 0@272r weight:0.000000e+00
%36 [256r,784r:0) 0@256r weight:0.000000e+00
%38 [800r,816r:0) 0@800r weight:0.000000e+00
%40 [304r,976r:0) 0@304r weight:0.000000e+00
%43 [880r,1008r:0) 0@880r weight:0.000000e+00
%45 [912r,992r:0) 0@912r weight:0.000000e+00
%47 [1024r,1040r:0) 0@1024r weight:0.000000e+00
%49 [320r,1200r:0) 0@320r weight:0.000000e+00
%52 [1104r,1232r:0) 0@1104r weight:0.000000e+00
%54 [1136r,1216r:0) 0@1136r weight:0.000000e+00
%56 [1248r,1264r:0) 0@1248r weight:0.000000e+00
%60 [1376r,1488r:0) 0@1376r weight:0.000000e+00
%62 [1312r,1472r:0) 0@1312r weight:0.000000e+00
%64 [1504r,1520r:0) 0@1504r weight:0.000000e+00
%66 [432r,464B:1)[560r,624B:2)[624B,1680r:0) 0@624B-phi 1@432r 2@560r weight:0.000000e+00
%69 [1584r,1712r:0) 0@1584r weight:0.000000e+00
%71 [1616r,1696r:0) 0@1616r weight:0.000000e+00
%73 [1728r,1744r:0) 0@1728r weight:0.000000e+00
%75 [1840r,1856r:0) 0@1840r weight:0.000000e+00
%78 [400r,464B:0)[528r,624B:1)[624B,1440r:2) 0@400r 1@528r 2@624B-phi weight:0.000000e+00
RegMasks: 64r 224r 832r 1056r 1280r 1536r 1760r 1872r
********** MACHINEINSTRS **********
# Machine code for function main: NoPHIs, TracksLiveness, TiedOpsRewritten
Frame Objects:
fi#0: size=4, align=4, at location [SP]
fi#1: size=4, align=4, at location [SP]
0B bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000); %bb.1(37.50%), %bb.2(62.50%)
16B ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
32B %12:gpr64common = MOVaddr target-flags(aarch64-page) @.str, target-flags(aarch64-pageoff, aarch64-nc) @.str
48B $x0 = COPY %12:gpr64common
64B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
80B ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
96B ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp
128B %15:gpr64common = ADDXri %stack.1.b, 0, 0
144B STRXui %15:gpr64common, $sp, 1 :: (store (s64) into stack + 8)
160B %16:gpr64common = ADDXri %stack.0.a, 0, 0
176B STRXui %16:gpr64common, $sp, 0 :: (store (s64) into stack)
192B %17:gpr64common = MOVaddr target-flags(aarch64-page) @.str.1, target-flags(aarch64-pageoff, aarch64-nc) @.str.1
208B $x0 = COPY %17:gpr64common
224B BL @scanf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
240B ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp
256B undef %36.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
272B undef %34.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
288B undef %31.sub_32:gpr64 = nsw ADDWrr %34.sub_32:gpr64, %36.sub_32:gpr64
304B undef %40.sub_32:gpr64 = nsw SUBWrr %36.sub_32:gpr64, %34.sub_32:gpr64
320B undef %49.sub_32:gpr64 = nsw MADDWrrr %34.sub_32:gpr64, %36.sub_32:gpr64, $wzr
336B CBNZW %34.sub_32:gpr64, %bb.2
352B bb.1:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
400B %78:fpr64 = FMOVD0
432B undef %66.sub_32:gpr64 = COPY $wzr
448B B %bb.3
464B bb.2.cond.true5:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
480B %25:fpr32 = nofpexcept SCVTFUWSri %36.sub_32:gpr64, implicit $fpcr
496B %26:fpr32 = nofpexcept SCVTFUWSri %34.sub_32:gpr64, implicit $fpcr
512B %27:fpr32 = nofpexcept FDIVSrr %25:fpr32, %26:fpr32, implicit $fpcr
528B %78:fpr64 = nofpexcept FCVTDSr %27:fpr32, implicit $fpcr
544B %28:gpr32 = SDIVWr %36.sub_32:gpr64, %34.sub_32:gpr64
560B undef %66.sub_32:gpr64 = MSUBWrrr %28:gpr32, %34.sub_32:gpr64, %36.sub_32:gpr64
624B bb.3.cond.end7:
; predecessors: %bb.2, %bb.1
successors: %bb.5(0x30000000), %bb.4(0x50000000); %bb.5(37.50%), %bb.4(62.50%)
688B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
720B STRXui %31:gpr64, $sp, 2 :: (store (s64) into stack + 16)
752B STRXui %34:gpr64, $sp, 1 :: (store (s64) into stack + 8)
784B STRXui %36:gpr64, $sp, 0 :: (store (s64) into stack)
800B %38:gpr64common = MOVaddr target-flags(aarch64-page) @.str.2, target-flags(aarch64-pageoff, aarch64-nc) @.str.2
816B $x0 = COPY %38:gpr64common
832B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
848B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
880B undef %43.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
912B undef %45.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
944B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
976B STRXui %40:gpr64, $sp, 2 :: (store (s64) into stack + 16)
992B STRXui %45:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1008B STRXui %43:gpr64, $sp, 0 :: (store (s64) into stack)
1024B %47:gpr64common = MOVaddr target-flags(aarch64-page) @.str.3, target-flags(aarch64-pageoff, aarch64-nc) @.str.3
1040B $x0 = COPY %47:gpr64common
1056B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1072B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
1104B undef %52.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
1136B undef %54.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
1168B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
1200B STRXui %49:gpr64, $sp, 2 :: (store (s64) into stack + 16)
1216B STRXui %54:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1232B STRXui %52:gpr64, $sp, 0 :: (store (s64) into stack)
1248B %56:gpr64common = MOVaddr target-flags(aarch64-page) @.str.4, target-flags(aarch64-pageoff, aarch64-nc) @.str.4
1264B $x0 = COPY %56:gpr64common
1280B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1296B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
1312B undef %62.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
1328B CBZW %62.sub_32:gpr64, %bb.5
1344B B %bb.4
1360B bb.4.if.then:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
1376B undef %60.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
1408B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
1440B STRDui %78:fpr64, $sp, 2 :: (store (s64) into stack + 16)
1472B STRXui %62:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1488B STRXui %60:gpr64, $sp, 0 :: (store (s64) into stack)
1504B %64:gpr64common = MOVaddr target-flags(aarch64-page) @.str.5, target-flags(aarch64-pageoff, aarch64-nc) @.str.5
1520B $x0 = COPY %64:gpr64common
1536B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1552B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
1584B undef %69.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
1616B undef %71.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
1648B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
1680B STRXui %66:gpr64, $sp, 2 :: (store (s64) into stack + 16)
1696B STRXui %71:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1712B STRXui %69:gpr64, $sp, 0 :: (store (s64) into stack)
1728B %73:gpr64common = MOVaddr target-flags(aarch64-page) @.str.6, target-flags(aarch64-pageoff, aarch64-nc) @.str.6
1744B $x0 = COPY %73:gpr64common
1760B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1776B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
1792B B %bb.6
1808B bb.5.if.else:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
1824B ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
1840B %75:gpr64common = MOVaddr target-flags(aarch64-page) @str, target-flags(aarch64-pageoff, aarch64-nc) @str
1856B $x0 = COPY %75:gpr64common
1872B BL @puts, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1888B ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
1904B bb.6.if.end:
; predecessors: %bb.4, %bb.5
1936B $w0 = COPY $wzr
1952B RET_ReallyLR implicit killed $w0
# End machine code for function main.
========= After MSched
********** INTERVALS **********
WSP [16r,16d:23)[64r,64d:22)[80r,80d:21)[96r,96d:20)[224r,224d:19)[240r,240d:18)[688r,688d:17)[832r,832d:16)[848r,848d:15)[944r,944d:14)[1056r,1056d:13)[1072r,1072d:12)[1168r,1168d:11)[1280r,1280d:10)[1296r,1296d:9)[1408r,1408d:8)[1536r,1536d:7)[1552r,1552d:6)[1648r,1648d:5)[1760r,1760d:4)[1776r,1776d:3)[1824r,1824d:2)[1872r,1872d:1)[1888r,1888d:0) 0@1888r 1@1872r 2@1824r 3@1776r 4@1760r 5@1648r 6@1552r 7@1536r 8@1408r 9@1296r 10@1280r 11@1168r 12@1072r 13@1056r 14@944r 15@848r 16@832r 17@688r 18@240r 19@224r 20@96r 21@80r 22@64r 23@16r
%12 [32r,48r:0) 0@32r weight:0.000000e+00
%15 [128r,168r:0) 0@128r weight:0.000000e+00
%16 [160r,176r:0) 0@160r weight:0.000000e+00
%17 [192r,208r:0) 0@192r weight:0.000000e+00
%25 [480r,512r:0) 0@480r weight:0.000000e+00
%26 [496r,512r:0) 0@496r weight:0.000000e+00
%27 [512r,528r:0) 0@512r weight:0.000000e+00
%28 [544r,560r:0) 0@544r weight:0.000000e+00
%31 [288r,720r:0) 0@288r weight:0.000000e+00
%34 [272r,752r:0) 0@272r weight:0.000000e+00
%36 [256r,784r:0) 0@256r weight:0.000000e+00
%38 [800r,816r:0) 0@800r weight:0.000000e+00
%40 [304r,976r:0) 0@304r weight:0.000000e+00
%43 [880r,1008r:0) 0@880r weight:0.000000e+00
%45 [912r,992r:0) 0@912r weight:0.000000e+00
%47 [1024r,1040r:0) 0@1024r weight:0.000000e+00
%49 [320r,1200r:0) 0@320r weight:0.000000e+00
%52 [1104r,1232r:0) 0@1104r weight:0.000000e+00
%54 [1136r,1216r:0) 0@1136r weight:0.000000e+00
%56 [1248r,1264r:0) 0@1248r weight:0.000000e+00
%60 [1376r,1488r:0) 0@1376r weight:0.000000e+00
%62 [1312r,1472r:0) 0@1312r weight:0.000000e+00
%64 [1504r,1520r:0) 0@1504r weight:0.000000e+00
%66 [360r,464B:1)[560r,624B:2)[624B,1680r:0) 0@624B-phi 1@360r 2@560r weight:0.000000e+00
%69 [1584r,1712r:0) 0@1584r weight:0.000000e+00
%71 [1616r,1696r:0) 0@1616r weight:0.000000e+00
%73 [1728r,1744r:0) 0@1728r weight:0.000000e+00
%75 [1840r,1856r:0) 0@1840r weight:0.000000e+00
%78 [400r,464B:0)[528r,624B:1)[624B,1440r:2) 0@400r 1@528r 2@624B-phi weight:0.000000e+00
RegMasks: 64r 224r 832r 1056r 1280r 1536r 1760r 1872r
********** MACHINEINSTRS **********
# Machine code for function main: NoPHIs, TracksLiveness, TiedOpsRewritten
Frame Objects:
fi#0: size=4, align=4, at location [SP]
fi#1: size=4, align=4, at location [SP]
0B bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000); %bb.1(37.50%), %bb.2(62.50%)
16B ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
32B %12:gpr64common = MOVaddr target-flags(aarch64-page) @.str, target-flags(aarch64-pageoff, aarch64-nc) @.str
48B $x0 = COPY %12:gpr64common
64B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
80B ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
96B ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp
128B %15:gpr64common = ADDXri %stack.1.b, 0, 0
160B %16:gpr64common = ADDXri %stack.0.a, 0, 0
168B STRXui %15:gpr64common, $sp, 1 :: (store (s64) into stack + 8)
176B STRXui %16:gpr64common, $sp, 0 :: (store (s64) into stack)
192B %17:gpr64common = MOVaddr target-flags(aarch64-page) @.str.1, target-flags(aarch64-pageoff, aarch64-nc) @.str.1
208B $x0 = COPY %17:gpr64common
224B BL @scanf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
240B ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp
256B undef %36.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
272B undef %34.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
288B undef %31.sub_32:gpr64 = nsw ADDWrr %34.sub_32:gpr64, %36.sub_32:gpr64
304B undef %40.sub_32:gpr64 = nsw SUBWrr %36.sub_32:gpr64, %34.sub_32:gpr64
320B undef %49.sub_32:gpr64 = nsw MADDWrrr %34.sub_32:gpr64, %36.sub_32:gpr64, $wzr
336B CBNZW %34.sub_32:gpr64, %bb.2
352B bb.1:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
360B undef %66.sub_32:gpr64 = COPY $wzr
400B %78:fpr64 = FMOVD0
448B B %bb.3
464B bb.2.cond.true5:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
480B %25:fpr32 = nofpexcept SCVTFUWSri %36.sub_32:gpr64, implicit $fpcr
496B %26:fpr32 = nofpexcept SCVTFUWSri %34.sub_32:gpr64, implicit $fpcr
512B %27:fpr32 = nofpexcept FDIVSrr %25:fpr32, %26:fpr32, implicit $fpcr
528B %78:fpr64 = nofpexcept FCVTDSr %27:fpr32, implicit $fpcr
544B %28:gpr32 = SDIVWr %36.sub_32:gpr64, %34.sub_32:gpr64
560B undef %66.sub_32:gpr64 = MSUBWrrr %28:gpr32, %34.sub_32:gpr64, %36.sub_32:gpr64
624B bb.3.cond.end7:
; predecessors: %bb.2, %bb.1
successors: %bb.5(0x30000000), %bb.4(0x50000000); %bb.5(37.50%), %bb.4(62.50%)
688B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
720B STRXui %31:gpr64, $sp, 2 :: (store (s64) into stack + 16)
752B STRXui %34:gpr64, $sp, 1 :: (store (s64) into stack + 8)
784B STRXui %36:gpr64, $sp, 0 :: (store (s64) into stack)
800B %38:gpr64common = MOVaddr target-flags(aarch64-page) @.str.2, target-flags(aarch64-pageoff, aarch64-nc) @.str.2
816B $x0 = COPY %38:gpr64common
832B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
848B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
880B undef %43.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
912B undef %45.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
944B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
976B STRXui %40:gpr64, $sp, 2 :: (store (s64) into stack + 16)
992B STRXui %45:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1008B STRXui %43:gpr64, $sp, 0 :: (store (s64) into stack)
1024B %47:gpr64common = MOVaddr target-flags(aarch64-page) @.str.3, target-flags(aarch64-pageoff, aarch64-nc) @.str.3
1040B $x0 = COPY %47:gpr64common
1056B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1072B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
1104B undef %52.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
1136B undef %54.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
1168B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
1200B STRXui %49:gpr64, $sp, 2 :: (store (s64) into stack + 16)
1216B STRXui %54:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1232B STRXui %52:gpr64, $sp, 0 :: (store (s64) into stack)
1248B %56:gpr64common = MOVaddr target-flags(aarch64-page) @.str.4, target-flags(aarch64-pageoff, aarch64-nc) @.str.4
1264B $x0 = COPY %56:gpr64common
1280B BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
1296B ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
1312B undef %62.sub_32:gpr64 = LDRWui %stack.1.b, 0 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
1328B CBZW %62.sub_32:gpr64, %bb.5
1344B B %bb.4
1360B bb.4.if.then:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
1376B undef %60.sub_32:gpr64 = LDRWui %stack.0.a, 0 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
1408B ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
1440B STRDui %78:fpr64, $sp, 2 :: (store (s64) into stack + 16)
1472B STRXui %62:gpr64, $sp, 1 :: (store (s64) into stack + 8)
1488B STRXui %60:gpr64, $sp, 0 :: (store (s64) into stack)
1504B %64:gpr64common = MOVaddr target-flags
ScheduleDAGMI (Post RA)
=========Before post-MI-sched:
# Machine code for function main: NoPHIs, TracksLiveness, NoVRegs, TiedOpsRewritten, TracksDebugUserValues
Frame Objects:
fi#0: size=4, align=4, at location [SP-68]
fi#1: size=4, align=4, at location [SP-72]
fi#2: size=8, align=8, at location [SP-8]
fi#3: size=8, align=8, at location [SP-16]
fi#4: size=8, align=8, at location [SP-24]
fi#5: size=8, align=8, at location [SP-32]
fi#6: size=8, align=8, at location [SP-40]
fi#7: size=8, align=8, at location [SP-48]
fi#8: size=8, align=8, at location [SP-56]
fi#9: size=8, align=8, at location [SP-64]
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000); %bb.1(37.50%), %bb.2(62.50%)
liveins: $d8, $d9, $x21, $x22, $x19, $x20, $lr
$sp = frame-setup SUBXri $sp, 96, 0
frame-setup STPDi killed $d9, killed $d8, $sp, 4 :: (store (s64) into %stack.9), (store (s64) into %stack.8)
frame-setup STPXi killed $x22, killed $x21, $sp, 6 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
frame-setup STPXi killed $x20, killed $x19, $sp, 8 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
frame-setup STPXi killed $fp, killed $lr, $sp, 10 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
$fp = frame-setup ADDXri $sp, 80, 0
frame-setup CFI_INSTRUCTION def_cfa $w29, 16
frame-setup CFI_INSTRUCTION offset $w30, -8
frame-setup CFI_INSTRUCTION offset $w29, -16
frame-setup CFI_INSTRUCTION offset $w19, -24
frame-setup CFI_INSTRUCTION offset $w20, -32
frame-setup CFI_INSTRUCTION offset $w21, -40
frame-setup CFI_INSTRUCTION offset $w22, -48
frame-setup CFI_INSTRUCTION offset $b8, -56
frame-setup CFI_INSTRUCTION offset $b9, -64
$x0 = ADRP target-flags(aarch64-page) @.str
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
$x8 = ADDXri $sp, 24, 0
$x9 = ADDXri $sp, 28, 0
STPXi killed renamable $x9, killed renamable $x8, $sp, 0 :: (store (s64) into stack + 8), (store (s64) into stack)
$x0 = ADRP target-flags(aarch64-page) @.str.1
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.1, 0
BL @scanf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
$w10 = ADDWrs renamable $w9, renamable $w8, 0, implicit-def $x10
$w21 = SUBWrs renamable $w8, renamable $w9, 0, implicit-def $x21
renamable $w20 = nsw MADDWrrr renamable $w9, renamable $w8, $wzr, implicit-def $x20
CBNZW renamable $w9, %bb.2
bb.1:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
liveins: $x8, $x9, $x10, $x20, $x21
$w19 = MOVZWi 0, 0, implicit-def $x19
renamable $d8 = FMOVD0
B %bb.3
bb.2.cond.true5:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
liveins: $x8, $x9, $x10, $x20, $x21
renamable $s0 = nofpexcept SCVTFUWSri renamable $w8, implicit $fpcr
renamable $s1 = nofpexcept SCVTFUWSri renamable $w9, implicit $fpcr
renamable $s0 = nofpexcept FDIVSrr killed renamable $s0, killed renamable $s1, implicit $fpcr
renamable $d8 = nofpexcept FCVTDSr killed renamable $s0, implicit $fpcr
renamable $w11 = SDIVWr renamable $w8, renamable $w9
renamable $w19 = MSUBWrrr killed renamable $w11, renamable $w9, renamable $w8, implicit-def $x19
bb.3.cond.end7:
; predecessors: %bb.2, %bb.1
successors: %bb.5(0x30000000), %bb.4(0x50000000); %bb.5(37.50%), %bb.4(62.50%)
liveins: $d8, $x8, $x9, $x10, $x19, $x20, $x21
STPXi killed renamable $x9, killed renamable $x10, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
$x0 = ADRP target-flags(aarch64-page) @.str.2
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.2, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
STPXi killed renamable $x9, killed renamable $x21, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
$x0 = ADRP target-flags(aarch64-page) @.str.3
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.3, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
STPXi killed renamable $x9, killed renamable $x20, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
$x0 = ADRP target-flags(aarch64-page) @.str.4
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.4, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
renamable $w8 = LDRWui $sp, 6, implicit-def $x8 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
CBZW renamable $w8, %bb.5
bb.4.if.then:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
liveins: $d8, $x8, $x19
renamable $w9 = LDRWui $sp, 7, implicit-def $x9 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
STRDui killed renamable $d8, $sp, 2 :: (store (s64) into stack + 16)
STPXi killed renamable $x9, killed renamable $x8, $sp, 0 :: (store (s64) into stack + 8), (store (s64) into stack)
$x0 = ADRP target-flags(aarch64-page) @.str.5
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.5, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
STPXi killed renamable $x9, killed renamable $x19, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
$x0 = ADRP target-flags(aarch64-page) @.str.6
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.6, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
B %bb.6
bb.5.if.else:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
$x0 = ADRP target-flags(aarch64-page) @str
renamable $x0 = ADDXri $x0, target-flags(aarch64-pageoff, aarch64-nc) @str, 0
BL @puts, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def dead $w0
bb.6.if.end:
; predecessors: %bb.4, %bb.5
$w0 = MOVZWi 0, 0
$fp, $lr = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
$x20, $x19 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
$x22, $x21 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.7), (load (s64) from %stack.6)
$d9, $d8 = frame-destroy LDPDi $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8)
$sp = frame-destroy ADDXri $sp, 96, 0
RET undef $lr, implicit $w0
# End machine code for function main.
========After post-MI-sched:
# Machine code for function main: NoPHIs, TracksLiveness, NoVRegs, TiedOpsRewritten, TracksDebugUserValues
Frame Objects:
fi#0: size=4, align=4, at location [SP-68]
fi#1: size=4, align=4, at location [SP-72]
fi#2: size=8, align=8, at location [SP-8]
fi#3: size=8, align=8, at location [SP-16]
fi#4: size=8, align=8, at location [SP-24]
fi#5: size=8, align=8, at location [SP-32]
fi#6: size=8, align=8, at location [SP-40]
fi#7: size=8, align=8, at location [SP-48]
fi#8: size=8, align=8, at location [SP-56]
fi#9: size=8, align=8, at location [SP-64]
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000); %bb.1(37.50%), %bb.2(62.50%)
liveins: $d8, $d9, $x21, $x22, $x19, $x20, $lr
$sp = frame-setup SUBXri $sp, 96, 0
frame-setup STPDi killed $d9, killed $d8, $sp, 4 :: (store (s64) into %stack.9), (store (s64) into %stack.8)
frame-setup STPXi killed $x22, killed $x21, $sp, 6 :: (store (s64) into %stack.7), (store (s64) into %stack.6)
frame-setup STPXi killed $x20, killed $x19, $sp, 8 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
frame-setup STPXi $fp, killed $lr, $sp, 10 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
$fp = frame-setup ADDXri $sp, 80, 0
frame-setup CFI_INSTRUCTION def_cfa $w29, 16
frame-setup CFI_INSTRUCTION offset $w30, -8
frame-setup CFI_INSTRUCTION offset $w29, -16
frame-setup CFI_INSTRUCTION offset $w19, -24
frame-setup CFI_INSTRUCTION offset $w20, -32
frame-setup CFI_INSTRUCTION offset $w21, -40
frame-setup CFI_INSTRUCTION offset $w22, -48
frame-setup CFI_INSTRUCTION offset $b8, -56
frame-setup CFI_INSTRUCTION offset $b9, -64
$x0 = ADRP target-flags(aarch64-page) @.str
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
$x0 = ADRP target-flags(aarch64-page) @.str.1
$x8 = ADDXri $sp, 24, 0
$x9 = ADDXri $sp, 28, 0
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.1, 0
STPXi killed renamable $x9, killed renamable $x8, $sp, 0 :: (store (s64) into stack + 8), (store (s64) into stack)
BL @scanf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
renamable $w20 = nsw MADDWrrr renamable $w9, renamable $w8, $wzr, implicit-def $x20
$w10 = ADDWrs renamable $w9, renamable $w8, 0, implicit-def $x10
$w21 = SUBWrs renamable $w8, renamable $w9, 0, implicit-def $x21
CBNZW renamable $w9, %bb.2
bb.1:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
liveins: $x8, $x9, $x10, $x20, $x21
$w19 = MOVZWi 0, 0, implicit-def $x19
renamable $d8 = FMOVD0
B %bb.3
bb.2.cond.true5:
; predecessors: %bb.0
successors: %bb.3(0x80000000); %bb.3(100.00%)
liveins: $x8, $x9, $x10, $x20, $x21
renamable $s0 = nofpexcept SCVTFUWSri renamable $w8, implicit $fpcr
renamable $s1 = nofpexcept SCVTFUWSri renamable $w9, implicit $fpcr
renamable $w11 = SDIVWr renamable $w8, renamable $w9
renamable $s0 = nofpexcept FDIVSrr killed renamable $s0, killed renamable $s1, implicit $fpcr
renamable $w19 = MSUBWrrr killed renamable $w11, renamable $w9, renamable $w8, implicit-def $x19
renamable $d8 = nofpexcept FCVTDSr killed renamable $s0, implicit $fpcr
bb.3.cond.end7:
; predecessors: %bb.2, %bb.1
successors: %bb.5(0x30000000), %bb.4(0x50000000); %bb.5(37.50%), %bb.4(62.50%)
liveins: $d8, $x8, $x9, $x10, $x19, $x20, $x21
$x0 = ADRP target-flags(aarch64-page) @.str.2
STPXi killed renamable $x9, killed renamable $x10, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.2, 0
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
$x0 = ADRP target-flags(aarch64-page) @.str.3
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.3, 0
STPXi killed renamable $x9, killed renamable $x21, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
$x0 = ADRP target-flags(aarch64-page) @.str.4
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.4, 0
STPXi killed renamable $x9, killed renamable $x20, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
renamable $w8 = LDRWui $sp, 6, implicit-def $x8 :: (dereferenceable load (s32) from %ir.b, !tbaa !6)
CBZW renamable $w8, %bb.5
bb.4.if.then:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
liveins: $d8, $x8, $x19
renamable $w9 = LDRWui $sp, 7, implicit-def $x9 :: (dereferenceable load (s32) from %ir.a, !tbaa !6)
$x0 = ADRP target-flags(aarch64-page) @.str.5
STRDui killed renamable $d8, $sp, 2 :: (store (s64) into stack + 16)
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.5, 0
STPXi killed renamable $x9, killed renamable $x8, $sp, 0 :: (store (s64) into stack + 8), (store (s64) into stack)
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
renamable $w9, renamable $w8 = LDPWi $sp, 6 :: (dereferenceable load (s32) from %ir.a, !tbaa !6), (dereferenceable load (s32) from %ir.b, !tbaa !6)
$x0 = ADRP target-flags(aarch64-page) @.str.6
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @.str.6, 0
STPXi killed renamable $x9, killed renamable $x19, $sp, 1 :: (store (s64) into stack + 16), (store (s64) into stack + 8)
STRXui killed renamable $x8, $sp, 0 :: (store (s64) into stack)
BL @printf, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
B %bb.6
bb.5.if.else:
; predecessors: %bb.3
successors: %bb.6(0x80000000); %bb.6(100.00%)
$x0 = ADRP target-flags(aarch64-page) @str
renamable $x0 = ADDXri killed $x0, target-flags(aarch64-pageoff, aarch64-nc) @str, 0
BL @puts, <regmask $fp $lr $wzr $xzr $b8 $b9 $b10 $b11 $b12 $b13 $b14 $b15 $d8 $d9 $d10 $d11 $d12 $d13 $d14 $d15 $h8 $h9 $h10 $h11 $h12 $h13 $h14 $h15 $s8 $s9 $s10 $s11 $s12 and 55 more...>, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp, implicit-def dead $w0
bb.6.if.end:
; predecessors: %bb.4, %bb.5
$fp, $lr = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
$x20, $x19 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
$w0 = MOVZWi 0, 0
$x22, $x21 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.7), (load (s64) from %stack.6)
$d9, $d8 = frame-destroy LDPDi $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8)
$sp = frame-destroy ADDXri $sp, 96, 0
RET undef $lr, implicit killed $w0
# End machine code for function main.
PostRASchedulerList (SchedulePostRATDList)
暂时没法打印出日志
参考文献
同一维度基地址是固定跨步,但是不同维度如果重新进行一次 base 的计算,会失去静态数组的优势。
malloc 的则可能需要重新计算 base 或 offset。
ld xxx, offset (sp) 是典型的溢出语句。
lw、ld 的 latency 一致。
小 kernel 不能加 call 调用。