3 files changed, 63 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll b/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
index ba8ff1bc181..3bb14c4b1cd 100644
--- a/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
+++ b/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -mtriple=x86_64-apple-macosx | FileCheck %s
+; RUN: llc -lsr-filter-same-scaled-reg=false < %s -o - -mtriple=x86_64-apple-macosx | FileCheck %s
 ; Test case for the recoloring of broken hints.
 ; This is tricky to have something reasonably small to kick this optimization since
 ; it requires that spliting and spilling occur.
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
index dcd068191e1..ea3f6077231 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -14,8 +14,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; current LSR cost model.
 ; CHECK-NOT: = ptrtoint i8* undef to i64
 ; CHECK: .lr.ph
-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1
-; CHECK: sub i64 [[TMP]], %tmp6
+; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1
+; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}}
 ; CHECK: ret void
 define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
 bb:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll
new file mode 100644
index 00000000000..4ce6f1a79fb
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -loop-reduce -lsr-filter-same-scaled-reg=true -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.ham = type { i8, i8, [5 x i32], i64, i64, i64 }
+
+@global = external local_unnamed_addr global %struct.ham, align 8
+
+define void @foo() local_unnamed_addr {
+bb:
+  %tmp = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 3), align 8
+  %tmp1 = and i64 %tmp, 1792
+  %tmp2 = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 4), align 8
+  %tmp3 = add i64 %tmp1, %tmp2
+  %tmp4 = load i8*, i8** null, align 8
+  %tmp5 = getelementptr inbounds i8, i8* %tmp4, i64 0
+  %tmp6 = sub i64 0, %tmp3
+  %tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp6
+  %tmp8 = inttoptr i64 0 to i8*
+  br label %bb9
+
+; Without filtering non-optimal formulae with the same ScaledReg and Scale, the strategy
+; to narrow LSR search space by picking winner reg will generate only one lsr.iv and
+; unoptimal result.
+; CHECK-LABEL: @foo(
+; CHECK: bb9:
+; CHECK-NEXT: = phi i8*
+; CHECK-NEXT: = phi i8*
+
+bb9:                                              ; preds = %bb12, %bb
+  %tmp10 = phi i8* [ %tmp7, %bb ], [ %tmp16, %bb12 ]
+  %tmp11 = phi i8* [ %tmp8, %bb ], [ %tmp17, %bb12 ]
+  br i1 false, label %bb18, label %bb12
+
+bb12:                                             ; preds = %bb9
+  %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 8
+  %tmp14 = bitcast i8* %tmp13 to i64*
+  %tmp15 = load i64, i64* %tmp14, align 1
+  %tmp16 = getelementptr inbounds i8, i8* %tmp10, i64 16
+  %tmp17 = getelementptr inbounds i8, i8* %tmp11, i64 16
+  br label %bb9
+
+bb18:                                             ; preds = %bb9
+  %tmp19 = icmp ugt i8* %tmp11, null
+  %tmp20 = getelementptr inbounds i8, i8* %tmp10, i64 8
+  %tmp21 = getelementptr inbounds i8, i8* %tmp11, i64 8
+  %tmp22 = select i1 %tmp19, i8* %tmp10, i8* %tmp20
+  %tmp23 = select i1 %tmp19, i8* %tmp11, i8* %tmp21
+  br label %bb24
+
+bb24:                                             ; preds = %bb24, %bb18
+  %tmp25 = phi i8* [ %tmp27, %bb24 ], [ %tmp22, %bb18 ]
+  %tmp26 = phi i8* [ %tmp29, %bb24 ], [ %tmp23, %bb18 ]
+  %tmp27 = getelementptr inbounds i8, i8* %tmp25, i64 1
+  %tmp28 = load i8, i8* %tmp25, align 1
+  %tmp29 = getelementptr inbounds i8, i8* %tmp26, i64 1
+  store i8 %tmp28, i8* %tmp26, align 1
+  %tmp30 = icmp eq i8* %tmp29, %tmp5
+  br label %bb24
+}