summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-02-19 00:05:23 +0000
committerDan Gohman <gohman@apple.com>2010-02-19 00:05:23 +0000
commit2446f575031e4fcf6086979af6f9fd8d5467b621 (patch)
treeaa34c8a9e538d5ea5b51a68ac14afd0713853829
parent6cb5ba4c4fadb1b66da04227e33f29b42885c61f (diff)
downloadbcm5719-llvm-2446f575031e4fcf6086979af6f9fd8d5467b621.tar.gz
bcm5719-llvm-2446f575031e4fcf6086979af6f9fd8d5467b621.zip
When determining the set of interesting reuse factors, consider
strides in foreign loops. This helps locate reuse opportunities with existing induction variables in foreign loops and reduces the need for inserting new ones. This fixes rdar://7657764. llvm-svn: 96629
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp24
-rw-r--r--llvm/test/CodeGen/X86/lsr-reuse.ll58
2 files changed, 72 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b0b0934de30..0c2f1d63805 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1690,21 +1690,29 @@ LSRInstance::getUse(const SCEV *&Expr,
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
- // Collect interesting types and factors.
+ // Collect interesting types and strides.
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Stride = UI->getStride();
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
- // Collect interesting factors.
+ // Add the stride for this loop.
+ Strides.insert(Stride);
+
+ // Add strides for other mentioned loops.
+ for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
+ AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
+ Strides.insert(AR->getStepRecurrence(SE));
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- Strides.begin(), SEnd = Strides.end(); NewStrideIter != SEnd;
- ++NewStrideIter) {
- const SCEV *OldStride = Stride;
+ next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
- if (OldStride == NewStride)
- continue;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
@@ -1726,8 +1734,6 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
- Strides.insert(Stride);
- }
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
diff --git a/llvm/test/CodeGen/X86/lsr-reuse.ll b/llvm/test/CodeGen/X86/lsr-reuse.ll
index 7f2b8cc8f83..2f6fb3fa8be 100644
--- a/llvm/test/CodeGen/X86/lsr-reuse.ll
+++ b/llvm/test/CodeGen/X86/lsr-reuse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -O3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
target datalayout = "e-p:64:64:64"
target triple = "x86_64-unknown-unknown"
@@ -384,3 +384,59 @@ loop:
return:
ret void
}
+
+; LSR should use only one indvar for the inner loop.
+; rdar://7657764
+
+; CHECK: asd:
+; CHECK: BB10_5:
+; CHECK-NEXT: addl (%r{{[^,]*}},%rdi,4), %e
+; CHECK-NEXT: incq %rdi
+; CHECK-NEXT: cmpq %rdi, %r{{[^,]*}}
+; CHECK-NEXT: jg
+
+%struct.anon = type { i32, [4200 x i32] }
+
+@bars = common global [123123 x %struct.anon] zeroinitializer, align 32 ; <[123123 x %struct.anon]*> [#uses=2]
+
+define i32 @asd(i32 %n) nounwind readonly {
+entry:
+ %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb.nph14, label %bb5
+
+bb.nph14: ; preds = %entry
+ %tmp18 = zext i32 %n to i64 ; <i64> [#uses=1]
+ br label %bb
+
+bb: ; preds = %bb3, %bb.nph14
+ %indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3]
+ %s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+ %scevgep2526 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
+ %1 = load i32* %scevgep2526, align 4 ; <i32> [#uses=2]
+ %2 = icmp sgt i32 %1, 0 ; <i1> [#uses=1]
+ br i1 %2, label %bb.nph, label %bb3
+
+bb.nph: ; preds = %bb
+ %tmp23 = sext i32 %1 to i64 ; <i64> [#uses=1]
+ br label %bb1
+
+bb1: ; preds = %bb.nph, %bb1
+ %indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2]
+ %s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1]
+ %c.08 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
+ %3 = load i32* %c.08, align 4 ; <i32> [#uses=1]
+ %4 = add nsw i32 %3, %s.07 ; <i32> [#uses=2]
+ %tmp19 = add i64 %indvar, 1 ; <i64> [#uses=2]
+ %5 = icmp sgt i64 %tmp23, %tmp19 ; <i1> [#uses=1]
+ br i1 %5, label %bb1, label %bb3
+
+bb3: ; preds = %bb1, %bb
+ %s.0.lcssa = phi i32 [ %s.113, %bb ], [ %4, %bb1 ] ; <i32> [#uses=2]
+ %indvar.next17 = add i64 %indvar16, 1 ; <i64> [#uses=2]
+ %exitcond = icmp eq i64 %indvar.next17, %tmp18 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb5, label %bb
+
+bb5: ; preds = %bb3, %entry
+ %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+ ret i32 %s.1.lcssa
+}
OpenPOWER on IntegriCloud