diff options
author | Dan Gohman <gohman@apple.com> | 2010-01-22 00:46:49 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-01-22 00:46:49 +0000 |
commit | 045f81981a61d283af9d6df1050cef6e7c7ff629 (patch) | |
tree | 7ff3408b837589b02774ce51d2459763a668ad58 /llvm/test/CodeGen | |
parent | f7b2fb51d191cb3fa668135a67c6b702c67d239e (diff) | |
download | bcm5719-llvm-045f81981a61d283af9d6df1050cef6e7c7ff629.tar.gz bcm5719-llvm-045f81981a61d283af9d6df1050cef6e7c7ff629.zip |
Revert LoopStrengthReduce.cpp to pre-r94061 for now.
llvm-svn: 94123
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/ARM/arm-negative-stride.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/lsr-code-insertion.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/remat.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb2/lsr-deficiency.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/iv-users-in-other-loops.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/loop-strength-reduce4.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/loop-strength-reduce8.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/lsr-reuse.ll | 159 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked-iv-safe.ll | 7 |
12 files changed, 29 insertions, 240 deletions
diff --git a/llvm/test/CodeGen/ARM/arm-negative-stride.ll b/llvm/test/CodeGen/ARM/arm-negative-stride.ll index 52ab8717c15..72ec8efcc44 100644 --- a/llvm/test/CodeGen/ARM/arm-negative-stride.ll +++ b/llvm/test/CodeGen/ARM/arm-negative-stride.ll @@ -1,32 +1,7 @@ ; RUN: llc < %s -march=arm | FileCheck %s -; This loop is rewritten with an indvar which counts down, which -; frees up a register from holding the trip count. - define void @test(i32* %P, i32 %A, i32 %i) nounwind { entry: -; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2] - icmp eq i32 %i, 0 ; <i1>:0 [#uses=1] - br i1 %0, label %return, label %bb - -bb: ; preds = %bb, %entry - %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] - %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1] - %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1] - store i32 %A, i32* %tmp2 - %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] - icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1] - br i1 %1, label %return, label %bb - -return: ; preds = %bb, %entry - ret void -} - -; This loop has a non-address use of the count-up indvar, so -; it'll remain. Now the original store uses a negative-stride address. - -define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind { -entry: ; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2] icmp eq i32 %i, 0 ; <i1>:0 [#uses=1] br i1 %0, label %return, label %bb @@ -36,7 +11,6 @@ bb: ; preds = %bb, %entry %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1] %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1] store i32 %A, i32* %tmp2 - store i32 %indvar, i32* null %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1] br i1 %1, label %return, label %bb diff --git a/llvm/test/CodeGen/ARM/lsr-code-insertion.ll b/llvm/test/CodeGen/ARM/lsr-code-insertion.ll index 1bbb96deeef..507ec2c7bd3 100644 --- a/llvm/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/llvm/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed} -; RUN: llc < %s -stats |& not grep {.*Number of re-materialization} +; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed} +; RUN: llc < %s -stats |& grep {.*Number of re-materialization} ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize ; the constant and do a store. We do *not* want something like this: diff --git a/llvm/test/CodeGen/ARM/remat.ll b/llvm/test/CodeGen/ARM/remat.ll index 9072bcb762d..9565c8bca6b 100644 --- a/llvm/test/CodeGen/ARM/remat.ll +++ b/llvm/test/CodeGen/ARM/remat.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | not grep "Number of re-materialization" +; RUN: llc < %s -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3 %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.LOCBOX = type { i32, i32, i32, i32 } diff --git a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll index ac2cd34e4b3..7b1b57a786e 100644 --- a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -1,29 +1,25 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s ; rdar://7387640 -; This now reduces to a single induction variable. - -; TODO: It still gets a GPR shuffle at the end of the loop -; This is because something in instruction selection has decided -; that comparing the pre-incremented value with zero is better -; than comparing the post-incremented value with -4. +; FIXME: We still need to rewrite array reference iv of stride -4 with loop +; count iv of stride -1. @G = external global i32 ; <i32*> [#uses=2] @array = external global i32* ; <i32**> [#uses=1] define arm_apcscc void @t() nounwind optsize { ; CHECK: t: -; CHECK: mov.w r2, #1000 +; CHECK: mov.w r2, #4000 +; CHECK: movw r3, #1001 entry: %.pre = load i32* @G, align 4 ; <i32> [#uses=1] br label %bb bb: ; preds = %bb, %entry ; CHECK: LBB1_1: -; CHECK: cmp r2, #0 -; CHECK: sub.w r9, r2, #1 -; CHECK: mov r2, r9 - +; CHECK: subs r3, #1 +; CHECK: cmp r3, #0 +; CHECK: sub.w r2, r2, #4 %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] %tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1] diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index 1d267565e06..71199abc572 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK: t1: ; CHECK: it ne ; CHECK: cmpne @@ -20,12 +20,12 @@ cond_next: } ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt. -define i32 @t2(i32 %a, i32 %b) nounwind { +define i32 @t2(i32 %a, i32 %b) { entry: ; CHECK: t2: -; CHECK: ite gt -; CHECK: subgt +; CHECK: ite le ; CHECK: suble +; CHECK: subgt %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer @@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry @x = external global i32* ; <i32**> [#uses=1] -define void @foo(i32 %a) nounwind { +define void @foo(i32 %a) { entry: %tmp = load i32** @x ; <i32*> [#uses=1] store i32 %a, i32* %tmp ret void } -define void @t3(i32 %a, i32 %b) nounwind { +define void @t3(i32 %a, i32 %b) { entry: ; CHECK: t3: ; CHECK: it lt diff --git a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll index 56d6aa960e2..bdbe713a295 100644 --- a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\ -; RUN: grep {asm-printer} | grep 34 +; RUN: grep {asm-printer} | grep 31 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { @@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1] - %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1] + %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] br i1 %tmp.upgrd.8, label %cond_true, label %return diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll index 8e315f4d80f..721d4c945b1 100644 --- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i ; CHECK: %bb.i28.i ; CHECK: addl $2 -; CHECK: addl $-2 +; CHECK: addl $2 %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2] %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1] %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2] diff --git a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll index 0410bc0d9a9..c695c29e068 100644 --- a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,11 +1,11 @@ ; RUN: llc < %s -march=x86-64 -o %t -; RUN: not grep inc %t +; RUN: grep inc %t | count 1 ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 10 +; RUN: grep addq %t | count 13 ; RUN: not grep addb %t ; RUN: grep leaq %t | count 9 -; RUN: grep leal %t | count 2 -; RUN: grep movq %t | count 10 +; RUN: grep leal %t | count 3 +; RUN: grep movq %t | count 5 ; IV users in each of the loops from other loops shouldn't cause LSR ; to insert new induction variables. Previously it would create a diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce4.ll b/llvm/test/CodeGen/X86/loop-strength-reduce4.ll index 6c0eb8c0df9..07e46eca75e 100644 --- a/llvm/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,19 +1,5 @@ -; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC - -; By starting the IV at -64 instead of 0, a cmp is eliminated, -; as the flags from the add can be used directly. - -; STATIC: movl $-64, %ecx - -; STATIC: movl %eax, _state+76(%ecx) -; STATIC: addl $16, %ecx -; STATIC: jne - -; In PIC mode the symbol can't be folded, so the change-compare-stride -; trick applies. - -; PIC: cmpl $64 +; RUN: llc < %s -march=x86 | grep cmp | grep 64 +; RUN: llc < %s -march=x86 | not grep inc @state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] @S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce8.ll b/llvm/test/CodeGen/X86/loop-strength-reduce8.ll index 6b2247d1d61..e14cd8a99e3 100644 --- a/llvm/test/CodeGen/X86/loop-strength-reduce8.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce8.ll @@ -1,10 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s - -; CHECK: leal 16(%eax), %edx -; CHECK: align -; CHECK: addl $4, %edx -; CHECK: decl %ecx -; CHECK: jne LBB1_2 +; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 } %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] } diff --git a/llvm/test/CodeGen/X86/lsr-reuse.ll b/llvm/test/CodeGen/X86/lsr-reuse.ll deleted file mode 100644 index a1919bab38a..00000000000 --- a/llvm/test/CodeGen/X86/lsr-reuse.ll +++ /dev/null @@ -1,159 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s -target datalayout = "e-p:64:64:64" -target triple = "x86_64-unknown-unknown" - -; Full strength reduction reduces register pressure from 5 to 4 here. - -; CHECK: full_me: -; CHECK: movsd (%rsi), %xmm0 -; CHECK: mulsd (%rdx), %xmm0 -; CHECK: movsd %xmm0, (%rdi) -; CHECK: addq $8, %rsi -; CHECK: addq $8, %rdx -; CHECK: addq $8, %rdi -; CHECK: decq %rcx -; CHECK: jne - -define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { -entry: - %t0 = icmp sgt i64 %n, 0 - br i1 %t0, label %loop, label %return - -loop: - %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] - %Ai = getelementptr inbounds double* %A, i64 %i - %Bi = getelementptr inbounds double* %B, i64 %i - %Ci = getelementptr inbounds double* %C, i64 %i - %t1 = load double* %Bi - %t2 = load double* %Ci - %m = fmul double %t1, %t2 - store double %m, double* %Ai - %i.next = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %i.next, %n - br i1 %exitcond, label %return, label %loop - -return: - ret void -} - -; In this test, the counting IV exit value is used, so full strength reduction -; would not reduce register pressure. IndVarSimplify ought to simplify such -; cases away, but it's useful here to verify that LSR's register pressure -; heuristics are working as expected. - -; CHECK: count_me_0: -; CHECK: movsd (%rsi,%rax,8), %xmm0 -; CHECK: mulsd (%rdx,%rax,8), %xmm0 -; CHECK: movsd %xmm0, (%rdi,%rax,8) -; CHECK: incq %rax -; CHECK: cmpq %rax, %rcx -; CHECK: jne - -define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { -entry: - %t0 = icmp sgt i64 %n, 0 - br i1 %t0, label %loop, label %return - -loop: - %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] - %Ai = getelementptr inbounds double* %A, i64 %i - %Bi = getelementptr inbounds double* %B, i64 %i - %Ci = getelementptr inbounds double* %C, i64 %i - %t1 = load double* %Bi - %t2 = load double* %Ci - %m = fmul double %t1, %t2 - store double %m, double* %Ai - %i.next = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %i.next, %n - br i1 %exitcond, label %return, label %loop - -return: - %q = phi i64 [ 0, %entry ], [ %i.next, %loop ] - ret i64 %q -} - -; In this test, the trip count value is used, so full strength reduction -; would not reduce register pressure. -; (though it would reduce register pressure inside the loop...) - -; CHECK: count_me_1: -; CHECK: movsd (%rsi,%rax,8), %xmm0 -; CHECK: mulsd (%rdx,%rax,8), %xmm0 -; CHECK: movsd %xmm0, (%rdi,%rax,8) -; CHECK: incq %rax -; CHECK: cmpq %rax, %rcx -; CHECK: jne - -define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind { -entry: - %t0 = icmp sgt i64 %n, 0 - br i1 %t0, label %loop, label %return - -loop: - %i = phi i64 [ %i.next, %loop ], [ 0, %entry ] - %Ai = getelementptr inbounds double* %A, i64 %i - %Bi = getelementptr inbounds double* %B, i64 %i - %Ci = getelementptr inbounds double* %C, i64 %i - %t1 = load double* %Bi - %t2 = load double* %Ci - %m = fmul double %t1, %t2 - store double %m, double* %Ai - %i.next = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %i.next, %n - br i1 %exitcond, label %return, label %loop - -return: - %q = phi i64 [ 0, %entry ], [ %n, %loop ] - ret i64 %q -} - -; This should be fully strength-reduced to reduce register pressure, however -; the current heuristics get distracted by all the reuse with the stride-1 -; induction variable first. - -; But even so, be clever and start the stride-1 variable at a non-zero value -; to eliminate an in-loop immediate value. - -; CHECK: count_me_2: -; CHECK: movl $5, %eax -; CHECK: align -; CHECK: BB4_1: -; CHECK: movsd (%rdi,%rax,8), %xmm0 -; CHECK: addsd (%rsi,%rax,8), %xmm0 -; CHECK: movsd %xmm0, (%rdx,%rax,8) -; CHECK: movsd 40(%rdi,%rax,8), %xmm0 -; CHECK: addsd 40(%rsi,%rax,8), %xmm0 -; CHECK: movsd %xmm0, 40(%rdx,%rax,8) -; CHECK: incq %rax -; CHECK: cmpq $5005, %rax -; CHECK: jne - -define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind { -entry: - br label %loop - -loop: - %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] - %i5 = add i64 %i, 5 - %Ai = getelementptr double* %A, i64 %i5 - %t2 = load double* %Ai - %Bi = getelementptr double* %B, i64 %i5 - %t4 = load double* %Bi - %t5 = fadd double %t2, %t4 - %Ci = getelementptr double* %C, i64 %i5 - store double %t5, double* %Ci - %i10 = add i64 %i, 10 - %Ai10 = getelementptr double* %A, i64 %i10 - %t9 = load double* %Ai10 - %Bi10 = getelementptr double* %B, i64 %i10 - %t11 = load double* %Bi10 - %t12 = fadd double %t9, %t11 - %Ci10 = getelementptr double* %C, i64 %i10 - store double %t12, double* %Ci10 - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 5000 - br i1 %exitcond, label %return, label %loop - -return: - ret void -} diff --git a/llvm/test/CodeGen/X86/masked-iv-safe.ll b/llvm/test/CodeGen/X86/masked-iv-safe.ll index 7111d687ed4..bc493bd8f72 100644 --- a/llvm/test/CodeGen/X86/masked-iv-safe.ll +++ b/llvm/test/CodeGen/X86/masked-iv-safe.ll @@ -4,9 +4,9 @@ ; RUN: not grep sar %t ; RUN: not grep shl %t ; RUN: grep add %t | count 2 -; RUN: grep inc %t | count 3 +; RUN: grep inc %t | count 4 ; RUN: grep dec %t | count 2 -; RUN: grep lea %t | count 3 +; RUN: grep lea %t | count 2 ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. @@ -127,9 +127,6 @@ return: ret void } -; TODO: If we could handle all the loads and stores as post-inc users, we could -; use {-1,+,1} in the induction variable register, and we'd get another inc, -; one fewer add, and a comparison with zero. define void @another_count_up(double* %d, i64 %n) nounwind { entry: br label %loop |