diff options
author | John Brawn <john.brawn@arm.com> | 2017-06-28 14:11:15 +0000 |
---|---|---|
committer | John Brawn <john.brawn@arm.com> | 2017-06-28 14:11:15 +0000 |
commit | 75d76e5e956c75fc524253d57153e40836d3e6d5 (patch) | |
tree | 67844a7a4f3db463b3d3abbbb476d2e64c04c14a /llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | |
parent | 48b30c3d55b450f841b222394c840da12f0329a2 (diff) | |
download | bcm5719-llvm-75d76e5e956c75fc524253d57153e40836d3e6d5.tar.gz bcm5719-llvm-75d76e5e956c75fc524253d57153e40836d3e6d5.zip |
[ARM] Improve if-conversion for M-class CPUs without branch predictors
The current heuristic in isProfitableToIfCvt assumes we have a branch predictor,
and so gives the wrong answer in some cases when we don't. This patch adds a
subtarget feature to indicate that a subtarget has no branch predictor, and
changes the heuristic in isProfitableToiIfCvt when it's present. This gives a
slight overall improvement in a set of embedded benchmarks on Cortex-M4 and
Cortex-M33.
Differential Revision: https://reviews.llvm.org/D34398
llvm-svn: 306547
Diffstat (limited to 'llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index e0810c358f2..1ec6b24b2ed 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1851,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &, +isProfitableToIfCvt(MachineBasicBlock &TBB, unsigned TCycles, unsigned TExtra, - MachineBasicBlock &, + MachineBasicBlock &FBB, unsigned FCycles, unsigned FExtra, BranchProbability Probability) const { if (!TCycles) @@ -1863,14 +1863,43 @@ isProfitableToIfCvt(MachineBasicBlock &, // Here we scale up each component of UnpredCost to avoid precision issue when // scaling TCycles/FCycles by Probability. const unsigned ScalingUpFactor = 1024; - unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); - unsigned FUnpredCost = + + unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; + unsigned UnpredCost; + if (!Subtarget.hasBranchPredictor()) { + // When we don't have a branch predictor it's always cheaper to not take a + // branch than take it, so we have to take that into account. + unsigned NotTakenBranchCost = 1; + unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); + unsigned TUnpredCycles, FUnpredCycles; + if (!FCycles) { + // Triangle: TBB is the fallthrough + TUnpredCycles = TCycles + NotTakenBranchCost; + FUnpredCycles = TakenBranchCost; + } else { + // Diamond: TBB is the block that is branched to, FBB is the fallthrough + TUnpredCycles = TCycles + TakenBranchCost; + FUnpredCycles = FCycles + NotTakenBranchCost; + } + // The total cost is the cost of each path scaled by their probabilites + unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); + UnpredCost = TUnpredCost + FUnpredCost; + // When predicating assume that the first IT can be folded away but later + // ones cost one cycle each + if (Subtarget.isThumb2() && TCycles + FCycles > 4) { + PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; + } + } else { + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FCycles * ScalingUpFactor); - unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1 * ScalingUpFactor; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + UnpredCost = TUnpredCost + FUnpredCost; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + } - return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; + return PredCost <= UnpredCost; } bool |