diff options
author | Gerolf Hoflehner <ghoflehner@apple.com> | 2016-04-24 05:14:01 +0000 |
---|---|---|
committer | Gerolf Hoflehner <ghoflehner@apple.com> | 2016-04-24 05:14:01 +0000 |
commit | 01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a (patch) | |
tree | 935cd91c560d5ade3b118bd4ede0ec519bc26601 /llvm/lib/CodeGen/MachineCombiner.cpp | |
parent | 9cf3bf659c7f2e447517115443bc1efb1693b541 (diff) | |
download | bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.tar.gz bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.zip |
[MachineCombiner] Support for floating-point FMA on ARM64 (re-commit r267098)
The original patch caused crashes because it could derefence a null pointer
for SelectionDAGTargetInfo for targets that do not define it.
Evaluates fmul+fadd -> fmadd combines and similar code sequences in the
machine combiner. It adds support for float and double similar to the existing
integer implementation. The key features are:
- DAGCombiner checks whether it should combine greedily or let the machine
combiner do the evaluation. This is only supported on ARM64.
- It gives preference to throughput over latency: the heuristic used is
to combine always in loops. The targets decides whether the machine
combiner should optimize for throughput or latency.
- Supports for fmadd, f(n)msub, fmla, fmls patterns
- On by default at O3 ffast-math
llvm-svn: 267328
Diffstat (limited to 'llvm/lib/CodeGen/MachineCombiner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineCombiner.cpp | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 44601d5e462..6b5c6ba8250 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -40,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass { const TargetRegisterInfo *TRI; MCSchedModel SchedModel; MachineRegisterInfo *MRI; + MachineLoopInfo *MLI; // Current MachineLoopInfo MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; @@ -86,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID; INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) @@ -93,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); @@ -354,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); auto BlockIter = MBB->begin(); + // Check if the block is in a loop. + const MachineLoop *ML = MLI->getLoopFor(MBB); while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; @@ -406,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (!NewInstCount) continue; + bool SubstituteAlways = false; + if (ML && TII->isThroughputPattern(P)) + SubstituteAlways = true; + // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (doSubstitute(NewInstCount, OldInstCount) || + if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) || (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, InstrIdxForVirtReg, P) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { @@ -447,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { SchedModel = STI.getSchedModel(); TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); + MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; OptSize = MF.getFunction()->optForSize(); |