summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2010-01-15 06:29:17 +0000
committerBob Wilson <bob.wilson@apple.com>2010-01-15 06:29:17 +0000
commit1a234c0aeeb32aa71a787b496ef2e2d37fd563dd (patch)
treee76b66f6c4fb8a9610444f9fd80f9550a6fc4929 /llvm/lib/CodeGen
parent5d6551816baaf08a508c2064d2134742793c1b68 (diff)
downloadbcm5719-llvm-1a234c0aeeb32aa71a787b496ef2e2d37fd563dd.tar.gz
bcm5719-llvm-1a234c0aeeb32aa71a787b496ef2e2d37fd563dd.zip
Change pre-regalloc tail duplication to only duplicate indirect branch blocks.
The pre-regalloc pass caused some regressions in both compile time and performance of the generated code, and it did not improve performance, except for indirect branches. I also moved the check for single-block loops to speed up the common case when running the taildup pass before reg allocation. llvm-svn: 93505
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/TailDuplication.cpp15
1 files changed, 11 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index c99c74c0f4d..f51f74d5065 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -253,7 +253,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
SSAUpdateVals.clear();
}
- // Eliminate some of the copies inserted tail duplication to maintain
+ // Eliminate some of the copies inserted by tail duplication to maintain
// SSA form.
for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
MachineInstr *Copy = Copies[i];
@@ -437,8 +437,11 @@ bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
SmallVector<MachineBasicBlock*, 8> &TDBBs,
SmallVector<MachineInstr*, 16> &Copies) {
- // Don't try to tail-duplicate single-block loops.
- if (TailBB->isSuccessor(TailBB))
+ // Pre-regalloc tail duplication hurts compile time and doesn't help
+ // much except for indirect branches.
+ bool hasIndirectBranch = (!TailBB->empty() &&
+ TailBB->back().getDesc().isIndirectBranch());
+ if (PreRegAlloc && !hasIndirectBranch)
return false;
// Set the limit on the number of instructions to duplicate, with a default
@@ -446,7 +449,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch())
+ if (hasIndirectBranch)
// If the target has hardware branch prediction that can handle indirect
// branches, duplicating them can often make them predictable when there
// are common paths through the code. The limit needs to be high enough
@@ -457,6 +460,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
else
MaxDuplicateCount = TailDuplicateSize;
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB->isSuccessor(TailBB))
+ return false;
+
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
OpenPOWER on IntegriCloud