[MachineCombiner] Support for floating-point FMA on ARM64 (re-commit r267098)

The original patch caused crashes because it could derefence a null pointer for SelectionDAGTargetInfo for targets that do not define it. Evaluates fmul+fadd -> fmadd combines and similar code sequences in the machine combiner. It adds support for float and double similar to the existing integer implementation. The key features are: - DAGCombiner checks whether it should combine greedily or let the machine combiner do the evaluation. This is only supported on ARM64. - It gives preference to throughput over latency: the heuristic used is to combine always in loops. The targets decides whether the machine combiner should optimize for throughput or latency. - Supports for fmadd, f(n)msub, fmla, fmls patterns - On by default at O3 ffast-math llvm-svn: 267328
author: Gerolf Hoflehner <ghoflehner@apple.com> 2016-04-24 05:14:01 +0000
committer: Gerolf Hoflehner <ghoflehner@apple.com> 2016-04-24 05:14:01 +0000
commit: 01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a (patch)
tree: 935cd91c560d5ade3b118bd4ede0ec519bc26601 /llvm/lib/CodeGen/MachineCombiner.cpp
parent: 9cf3bf659c7f2e447517115443bc1efb1693b541 (diff)
download: bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.tar.gz
bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.zip
1 files changed, 11 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 44601d5e462..6b5c6ba8250 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -40,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass {
   const TargetRegisterInfo *TRI;
   MCSchedModel SchedModel;
   MachineRegisterInfo *MRI;
+  MachineLoopInfo *MLI; // Current MachineLoopInfo
   MachineTraceMetrics *Traces;
   MachineTraceMetrics::Ensemble *MinInstr;
 
@@ -86,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID;
 
 INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
                       "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
 INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
                     false, false)
@@ -93,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addRequired<MachineTraceMetrics>();
   AU.addPreserved<MachineTraceMetrics>();
@@ -354,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
 
   auto BlockIter = MBB->begin();
+  // Check if the block is in a loop.
+  const MachineLoop *ML = MLI->getLoopFor(MBB);
 
   while (BlockIter != MBB->end()) {
     auto &MI = *BlockIter++;
@@ -406,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       if (!NewInstCount)
         continue;
 
+      bool SubstituteAlways = false;
+      if (ML && TII->isThroughputPattern(P))
+        SubstituteAlways = true;
+
       // Substitute when we optimize for codesize and the new sequence has
       // fewer instructions OR
       // the new sequence neither lengthens the critical path nor increases
       // resource pressure.
-      if (doSubstitute(NewInstCount, OldInstCount) ||
+      if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
           (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
                                    InstrIdxForVirtReg, P) &&
            preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
@@ -447,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   SchedModel = STI.getSchedModel();
   TSchedModel.init(SchedModel, &STI, TII);
   MRI = &MF.getRegInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
   OptSize = MF.getFunction()->optForSize();
author	Gerolf Hoflehner <ghoflehner@apple.com>	2016-04-24 05:14:01 +0000
committer	Gerolf Hoflehner <ghoflehner@apple.com>	2016-04-24 05:14:01 +0000
commit	01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a (patch)
tree	935cd91c560d5ade3b118bd4ede0ec519bc26601 /llvm/lib/CodeGen/MachineCombiner.cpp
parent	9cf3bf659c7f2e447517115443bc1efb1693b541 (diff)
download	bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.tar.gz bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.zip