diff options
author | Gerolf Hoflehner <ghoflehner@apple.com> | 2016-04-22 02:15:19 +0000 |
---|---|---|
committer | Gerolf Hoflehner <ghoflehner@apple.com> | 2016-04-22 02:15:19 +0000 |
commit | b32f11fc62ef12de1762adf588de6ee6bd4b2bb0 (patch) | |
tree | 6b86fb22b492753a5a4735482ae05a367c41f92e /llvm/lib/CodeGen | |
parent | 6fb3f19959b39cb2f5cff276db55698464c59fa4 (diff) | |
download | bcm5719-llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.tar.gz bcm5719-llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.zip |
[MachineCombiner] Support for floating-point FMA on ARM64
Evaluates fmul+fadd -> fmadd combines and similar code sequences in the
machine combiner. It adds support for float and double similar to the existing
integer implementation. The key features are:
- DAGCombiner checks whether it should combine greedily or let the machine
combiner do the evaluation. This is only supported on ARM64.
- It gives preference to throughput over latency: the heuristic used is
to combine always in loops. The targets decides whether the machine
combiner should optimize for throughput or latency.
- Supports for fmadd, f(n)msub, fmla, fmls patterns
- On by default at O3 ffast-math
llvm-svn: 267098
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/MachineCombiner.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TargetInstrInfo.cpp | 6 |
3 files changed, 27 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 44601d5e462..6b5c6ba8250 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -40,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass { const TargetRegisterInfo *TRI; MCSchedModel SchedModel; MachineRegisterInfo *MRI; + MachineLoopInfo *MLI; // Current MachineLoopInfo MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; @@ -86,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID; INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) @@ -93,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); @@ -354,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); auto BlockIter = MBB->begin(); + // Check if the block is in a loop. + const MachineLoop *ML = MLI->getLoopFor(MBB); while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; @@ -406,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (!NewInstCount) continue; + bool SubstituteAlways = false; + if (ML && TII->isThroughputPattern(P)) + SubstituteAlways = true; + // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (doSubstitute(NewInstCount, OldInstCount) || + if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) || (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, InstrIdxForVirtReg, P) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { @@ -447,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { SchedModel = STI.getSchedModel(); TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); + MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; OptSize = MF.getFunction()->optForSize(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6c9800824ef..ff3fee2b4cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -85,6 +86,7 @@ namespace { class DAGCombiner { SelectionDAG &DAG; + const SelectionDAGTargetInfo &STI; const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; @@ -469,8 +471,9 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + : DAG(D), STI(D.getSelectionDAGInfo()), TLI(D.getTargetLoweringInfo()), + Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), + LegalTypes(false), AA(A) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } @@ -7715,6 +7718,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + if (AllowFusion && STI.GenerateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -7898,6 +7904,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + if (AllowFusion && STI.GenerateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 86517d9afbc..800ad6d1bb4 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -655,7 +655,11 @@ bool TargetInstrInfo::getMachineCombinerPatterns( return false; } - +/// Return true when a code sequence can improve loop throughput. +bool +TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { + return false; +} /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( |