summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2017-02-24 18:41:32 +0000
committerMichael Kuperstein <mkuper@google.com>2017-02-24 18:41:32 +0000
commit46b131e3f85b448151b717f1d240e0a28a10f579 (patch)
tree51985b23bf9e4331e2b9de64f1fb76d3b849f5fd /llvm/test/CodeGen
parentcdf2bd656a68b614a2224d69072f43690b5141d2 (diff)
downloadbcm5719-llvm-46b131e3f85b448151b717f1d240e0a28a10f579.tar.gz
bcm5719-llvm-46b131e3f85b448151b717f1d240e0a28a10f579.zip
[CGP] Split some critical edges coming out of indirect branches
Splitting critical edges when one of the source edges is an indirectbr is hard in general (because it requires changing the memory the indirectbr reads). But if a block only has a single indirectbr predecessor (which is the common case), we can simulate splitting that edge by splitting the destination block, and retargeting the *direct* branches. This is motivated by the use of computed gotos in python 2.7: PyEval_EvalFrame() ends up using an indirect branch with ~100 successors, and passing a constant to each of those. Since MachineSink can't break indirect critical edges on demand (and doing this in MIR doesn't look feasible), this causes us to emit about ~100 defs of registers containing constants, which we in the predecessor block, where only one of those constants is used in each successor. So, at each computed goto, we needlessly spill about a 100 constants to stack. The end result is that a clang-compiled python interpreter can be about ~2.5x slower on a simple python reduction loop than a gcc-compiled interpreter. Differential Revision: https://reviews.llvm.org/D29916 llvm-svn: 296149
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/ARM/indirectbr.ll1
-rw-r--r--llvm/test/CodeGen/MSP430/indirectbr2.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/indirectbr.ll36
3 files changed, 26 insertions, 13 deletions
diff --git a/llvm/test/CodeGen/ARM/indirectbr.ll b/llvm/test/CodeGen/ARM/indirectbr.ll
index d15ef14b449..90defad43a7 100644
--- a/llvm/test/CodeGen/ARM/indirectbr.ll
+++ b/llvm/test/CodeGen/ARM/indirectbr.ll
@@ -47,6 +47,7 @@ L3: ; preds = %L4, %bb2
br label %L2
L2: ; preds = %L3, %bb2
+; THUMB-LABEL: %L1.clone
; THUMB: muls
%res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; <i32> [#uses=1]
%phitmp = mul i32 %res.2, 6 ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/MSP430/indirectbr2.ll b/llvm/test/CodeGen/MSP430/indirectbr2.ll
index b0b4f1cbfd2..55598b97d91 100644
--- a/llvm/test/CodeGen/MSP430/indirectbr2.ll
+++ b/llvm/test/CodeGen/MSP430/indirectbr2.ll
@@ -5,7 +5,7 @@ define internal i16 @foo(i16 %i) nounwind {
entry:
%tmp1 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
%gotovar.4.0 = load i8*, i8** %tmp1, align 4 ; <i8*> [#uses=1]
-; CHECK: br .LC.0.2070(r12)
+; CHECK: br .LC.0.2070(r15)
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
L5: ; preds = %bb2
diff --git a/llvm/test/CodeGen/PowerPC/indirectbr.ll b/llvm/test/CodeGen/PowerPC/indirectbr.ll
index d1e03ca7773..c040d7859a8 100644
--- a/llvm/test/CodeGen/PowerPC/indirectbr.ll
+++ b/llvm/test/CodeGen/PowerPC/indirectbr.ll
@@ -17,23 +17,35 @@ entry:
bb2: ; preds = %entry, %bb3
%gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
; PIC: mtctr
-; PIC-NEXT: li
-; PIC-NEXT: li
-; PIC-NEXT: li
-; PIC-NEXT: li
; PIC-NEXT: bctr
+; PIC: li
+; PIC: b LBB
+; PIC: li
+; PIC: b LBB
+; PIC: li
+; PIC: b LBB
+; PIC: li
+; PIC: b LBB
; STATIC: mtctr
-; STATIC-NEXT: li
-; STATIC-NEXT: li
-; STATIC-NEXT: li
-; STATIC-NEXT: li
; STATIC-NEXT: bctr
+; STATIC: li
+; STATIC: b LBB
+; STATIC: li
+; STATIC: b LBB
+; STATIC: li
+; STATIC: b LBB
+; STATIC: li
+; STATIC: b LBB
; PPC64: mtctr
-; PPC64-NEXT: li
-; PPC64-NEXT: li
-; PPC64-NEXT: li
-; PPC64-NEXT: li
; PPC64-NEXT: bctr
+; PPC64: li
+; PPC64: b LBB
+; PPC64: li
+; PPC64: b LBB
+; PPC64: li
+; PPC64: b LBB
+; PPC64: li
+; PPC64: b LBB
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
bb3: ; preds = %entry
OpenPOWER on IntegriCloud