summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
diff options
context:
space:
mode:
authorJohn Brawn <john.brawn@arm.com>2017-06-28 14:11:15 +0000
committerJohn Brawn <john.brawn@arm.com>2017-06-28 14:11:15 +0000
commit75d76e5e956c75fc524253d57153e40836d3e6d5 (patch)
tree67844a7a4f3db463b3d3abbbb476d2e64c04c14a /llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
parent48b30c3d55b450f841b222394c840da12f0329a2 (diff)
downloadbcm5719-llvm-75d76e5e956c75fc524253d57153e40836d3e6d5.tar.gz
bcm5719-llvm-75d76e5e956c75fc524253d57153e40836d3e6d5.zip
[ARM] Improve if-conversion for M-class CPUs without branch predictors
The current heuristic in isProfitableToIfCvt assumes we have a branch predictor, and so gives the wrong answer in some cases when we don't. This patch adds a subtarget feature to indicate that a subtarget has no branch predictor, and changes the heuristic in isProfitableToiIfCvt when it's present. This gives a slight overall improvement in a set of embedded benchmarks on Cortex-M4 and Cortex-M33. Differential Revision: https://reviews.llvm.org/D34398 llvm-svn: 306547
Diffstat (limited to 'llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp45
1 files changed, 37 insertions, 8 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e0810c358f2..1ec6b24b2ed 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1851,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &,
+isProfitableToIfCvt(MachineBasicBlock &TBB,
unsigned TCycles, unsigned TExtra,
- MachineBasicBlock &,
+ MachineBasicBlock &FBB,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
if (!TCycles)
@@ -1863,14 +1863,43 @@ isProfitableToIfCvt(MachineBasicBlock &,
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
const unsigned ScalingUpFactor = 1024;
- unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
- unsigned FUnpredCost =
+
+ unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
+ unsigned UnpredCost;
+ if (!Subtarget.hasBranchPredictor()) {
+ // When we don't have a branch predictor it's always cheaper to not take a
+ // branch than take it, so we have to take that into account.
+ unsigned NotTakenBranchCost = 1;
+ unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
+ unsigned TUnpredCycles, FUnpredCycles;
+ if (!FCycles) {
+ // Triangle: TBB is the fallthrough
+ TUnpredCycles = TCycles + NotTakenBranchCost;
+ FUnpredCycles = TakenBranchCost;
+ } else {
+ // Diamond: TBB is the block that is branched to, FBB is the fallthrough
+ TUnpredCycles = TCycles + TakenBranchCost;
+ FUnpredCycles = FCycles + NotTakenBranchCost;
+ }
+ // The total cost is the cost of each path scaled by their probabilites
+ unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
+ unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
+ UnpredCost = TUnpredCost + FUnpredCost;
+ // When predicating assume that the first IT can be folded away but later
+ // ones cost one cycle each
+ if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
+ PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
+ }
+ } else {
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
Probability.getCompl().scale(FCycles * ScalingUpFactor);
- unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1 * ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ }
- return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
+ return PredCost <= UnpredCost;
}
bool
OpenPOWER on IntegriCloud