summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShuxin Yang <shuxin.llvm@gmail.com>2013-06-04 01:00:57 +0000
committerShuxin Yang <shuxin.llvm@gmail.com>2013-06-04 01:00:57 +0000
commit8b8fd2171c19b82b5807c3eaed2ce675b9c36258 (patch)
tree1c576a66487ec34fa2d1bad0bee72a5c5738728f
parent6df859d85954df08e201fd4506f19b20c37d49e6 (diff)
downloadbcm5719-llvm-8b8fd2171c19b82b5807c3eaed2ce675b9c36258.tar.gz
bcm5719-llvm-8b8fd2171c19b82b5807c3eaed2ce675b9c36258.zip
Fix a defect in code-layout pass, improving Benchmarks/Olden/em3d/em3d by about 30%
(4.58s vs 3.2s on an oldish Mac Tower). The corresponding src is excerpted bellow. The lopp accounts for about 90% of execution time. -------------------- cat -n test-suite/MultiSource/Benchmarks/Olden/em3d/make_graph.c 90 91 for (k=0; k<j; k++) 92 if (other_node == cur_node->to_nodes[k]) break; The defective layout is sketched bellow, where the two branches need to swap. ------------------------------------------------------------------------ L: ... if (cond) goto out-of-loop goto L While this code sequence is defective, I don't understand why it incurs 1/3 of execution time. CPU-event-profiling indicates the poor laoyout dose not increase in br-misprediction; it dosen't increase stall cycle at all, and it dosen't prevent the CPU detect the loop (i.e. Loop-Stream-Detector seems to be working fine as well)... The root cause of the problem is that the layout pass calls AnalyzeBranch() with basic-block which is not updated to reflect its current layout. rdar://13966341 llvm-svn: 183174
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp26
1 files changed, 25 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index bfba503b351..4b0f7f38fcc 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -991,6 +991,28 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // The "PrevBB" is not yet updated to reflect current code layout, so,
+ // o. it may fall-through to a block without explict "goto" instruction
+ // before layout, and no longer fall-through it after layout; or
+ // o. just opposite.
+ //
+ // AnalyzeBranch() may return erroneous value for FBB when these two
+ // situations take place. For the first scenario FBB is mistakenly set
+ // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
+ // is mistakenly pointing to "*BI".
+ //
+ bool needUpdateBr = true;
+ if (!Cond.empty() && (!FBB || FBB == *BI)) {
+ PrevBB->updateTerminator();
+ needUpdateBr = false;
+ Cond.clear();
+ TBB = FBB = 0;
+ if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // FIXME: This should never take place.
+ TBB = FBB = 0;
+ }
+ }
+
// If PrevBB has a two-way branch, try to re-order the branches
// such that we branch to the successor with higher weight first.
if (TBB && !Cond.empty() && FBB &&
@@ -1003,8 +1025,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ needUpdateBr = true;
}
- PrevBB->updateTerminator();
+ if (needUpdateBr)
+ PrevBB->updateTerminator();
}
}
OpenPOWER on IntegriCloud