summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorGerolf Hoflehner <ghoflehner@apple.com>2016-04-24 05:14:01 +0000
committerGerolf Hoflehner <ghoflehner@apple.com>2016-04-24 05:14:01 +0000
commit01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a (patch)
tree935cd91c560d5ade3b118bd4ede0ec519bc26601 /llvm/lib/CodeGen
parent9cf3bf659c7f2e447517115443bc1efb1693b541 (diff)
downloadbcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.tar.gz
bcm5719-llvm-01b3a6184aae0b5ef1e2cccd4bb8da8d6a0df68a.zip
[MachineCombiner] Support for floating-point FMA on ARM64 (re-commit r267098)
The original patch caused crashes because it could derefence a null pointer for SelectionDAGTargetInfo for targets that do not define it. Evaluates fmul+fadd -> fmadd combines and similar code sequences in the machine combiner. It adds support for float and double similar to the existing integer implementation. The key features are: - DAGCombiner checks whether it should combine greedily or let the machine combiner do the evaluation. This is only supported on ARM64. - It gives preference to throughput over latency: the heuristic used is to combine always in loops. The targets decides whether the machine combiner should optimize for throughput or latency. - Supports for fmadd, f(n)msub, fmla, fmls patterns - On by default at O3 ffast-math llvm-svn: 267328
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp11
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp6
3 files changed, 26 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 44601d5e462..6b5c6ba8250 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -40,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
MCSchedModel SchedModel;
MachineRegisterInfo *MRI;
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
@@ -86,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID;
INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
"Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
false, false)
@@ -93,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
@@ -354,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
auto BlockIter = MBB->begin();
+ // Check if the block is in a loop.
+ const MachineLoop *ML = MLI->getLoopFor(MBB);
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
@@ -406,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (!NewInstCount)
continue;
+ bool SubstituteAlways = false;
+ if (ML && TII->isThroughputPattern(P))
+ SubstituteAlways = true;
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (doSubstitute(NewInstCount, OldInstCount) ||
+ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
(improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
InstrIdxForVirtReg, P) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
@@ -447,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
SchedModel = STI.getSchedModel();
TSchedModel.init(SchedModel, &STI, TII);
MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
OptSize = MF.getFunction()->optForSize();
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 703d33bff17..f740e59af96 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -7716,6 +7717,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ ;
+ if (AllowFusion && STI && STI->GenerateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
@@ -7899,6 +7905,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ if (AllowFusion && STI && STI->GenerateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
@@ -8367,7 +8377,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
AddToWorklist(Fused.getNode());
return Fused;
}
-
return SDValue();
}
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 86517d9afbc..800ad6d1bb4 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -655,7 +655,11 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
return false;
}
-
+/// Return true when a code sequence can improve loop throughput.
+bool
+TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+ return false;
+}
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
OpenPOWER on IntegriCloud