Re-implement the main strength-reduction portion of LoopStrengthReduction.

This new version is much more aggressive about doing "full" reduction in cases where it reduces register pressure, and also more aggressive about rewriting induction variables to count down (or up) to zero when doing so reduces register pressure. It currently uses fairly simplistic algorithms for finding reuse opportunities, but it introduces a new framework allows it to combine multiple strategies at once to form hybrid solutions, instead of doing all full-reduction or all base+index. llvm-svn: 94061
author: Dan Gohman <gohman@apple.com> 2010-01-21 02:09:26 +0000
committer: Dan Gohman <gohman@apple.com> 2010-01-21 02:09:26 +0000
commit: 51ad99d2c514a8923a2519e37b43f774d95ef9a5 (patch)
tree: 4dedf54ab5c92f08d11a647e31a5f249f3abf4fc /llvm/test/Transforms
parent: 626aba43d0960b8a99c1dd1680c866cc3f13bbf9 (diff)
download: bcm5719-llvm-51ad99d2c514a8923a2519e37b43f774d95ef9a5.tar.gz
bcm5719-llvm-51ad99d2c514a8923a2519e37b43f774d95ef9a5.zip
6 files changed, 13 insertions, 41 deletions
diff --git a/llvm/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/llvm/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
index 78095940763..19d54ff2a22 100644
--- a/llvm/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
+++ b/llvm/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -indvars -S > %t
-; RUN: grep add %t | count 8
-; RUN: grep mul %t | count 7
+; RUN: grep add %t | count 6
+; RUN: grep sub %t | count 2
+; RUN: grep mul %t | count 6
 
 define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
 entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/llvm/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
index 7c7a21c013f..99cb8569b3f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep ugt
-; PR2535
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl	\\$\[1\], %}
 
 @.str = internal constant [4 x i8] c"%d\0A\00"
 
@@ -16,7 +15,7 @@ forbody:
         %add166 = or i32 %mul15, 1              ; <i32> [#uses=1] *
         call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
         %inc = add i32 %i.0, 1          ; <i32> [#uses=3]
-        %cmp = icmp ult i32 %inc, 1027          ; <i1> [#uses=1]
+        %cmp = icmp ne i32 %inc, 1027          ; <i1> [#uses=1]
         br i1 %cmp, label %forbody, label %afterfor
 
 afterfor:               ; preds = %forcond
diff --git a/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
index 36941ad6d36..d9abc8ba665 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
@@ -1,10 +1,9 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl	\$4}
+; RUN: llc < %s -o - | grep {testl	%ecx, %ecx}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 
-; This is like change-compare-stride-trickiness-1.ll except the comparison
-; happens before the relevant use, so the comparison stride can't be
-; easily changed.
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
 
 define void @foo() nounwind {
 entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
index 8a3978bb2ee..ea8a259ecd8 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpq	\$8}
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp.	\$8}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9"
 
-; This is like change-compare-stride-trickiness-0.ll except the comparison
-; happens after the relevant use, so the comparison stride can be
-; easily changed.
+; The comparison happens after the relevant use, so the stride can easily
+; be changed. The comparison can be done in a narrower mode than the
+; induction variable.
 
 define void @foo() nounwind {
 entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/count-to-zero.ll b/llvm/test/Transforms/LoopStrengthReduce/count-to-zero.ll
index 8cc3b5c1034..feb79f8a0c7 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/count-to-zero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/count-to-zero.ll
@@ -19,7 +19,7 @@ bb3:                                              ; preds = %bb1
   %tmp4 = add i32 %c_addr.1, -1                   ; <i32> [#uses=1]
   %c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
   %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-; CHECK: sub i32 %lsr.iv, 1
+; CHECK: add i32 %lsr.iv, -1
   br label %bb6
 
 bb6:                                              ; preds = %bb3, %entry
diff --git a/llvm/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll b/llvm/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
deleted file mode 100644
index 4ad5d1478d6..00000000000
--- a/llvm/test/Transforms/LoopStrengthReduce/icmp_use_postinc.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
-  br i1 undef, label %bb4.preheader, label %bb.nph8
-
-bb4.preheader:                                    ; preds = %entry
-  br label %bb4
-
-bb1:                                              ; preds = %bb4
-  br i1 undef, label %bb.nph8, label %bb3
-
-bb3:                                              ; preds = %bb1
-  %phitmp = add i32 %indvar, 1                    ; <i32> [#uses=1]
-  br label %bb4
-
-bb4:                                              ; preds = %bb3, %bb4.preheader
-; CHECK: %lsr.iv = phi
-; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
-; CHECK: %0 = icmp slt i32 %lsr.iv.next, %argc
-  %indvar = phi i32 [ 1, %bb4.preheader ], [ %phitmp, %bb3 ] ; <i32> [#uses=2]
-  %0 = icmp slt i32 %indvar, %argc                ; <i1> [#uses=1]
-  br i1 %0, label %bb1, label %bb.nph8
-
-bb.nph8:                                          ; preds = %bb4, %bb1, %entry
-  unreachable
-}
author	Dan Gohman <gohman@apple.com>	2010-01-21 02:09:26 +0000
committer	Dan Gohman <gohman@apple.com>	2010-01-21 02:09:26 +0000
commit	51ad99d2c514a8923a2519e37b43f774d95ef9a5 (patch)
tree	4dedf54ab5c92f08d11a647e31a5f249f3abf4fc /llvm/test/Transforms
parent	626aba43d0960b8a99c1dd1680c866cc3f13bbf9 (diff)
download	bcm5719-llvm-51ad99d2c514a8923a2519e37b43f774d95ef9a5.tar.gz bcm5719-llvm-51ad99d2c514a8923a2519e37b43f774d95ef9a5.zip