8 files changed, 456 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
new file mode 100644
index 00000000000..3491e08bbaa
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c) #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+  %0 = load double, double* %arrayidx, align 8
+  %mul = fmul double %0, 2.000000e+00
+  %mul3 = fmul double %0, %mul
+  %arrayidx5 = getelementptr inbounds double, double* %c, i64 %indvars.iv
+  %1 = load double, double* %arrayidx5, align 8
+  %mul6 = fmul double %1, 3.000000e+00
+  %mul9 = fmul double %1, %mul6
+  %add = fadd double %mul3, %mul9
+  %mul12 = fmul double %0, 4.000000e+00
+  %mul15 = fmul double %mul12, %1
+  %add16 = fadd double %mul15, %add
+  %add17 = fadd double %add16, 1.000000e+00
+  %arrayidx19 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  store double %add17, double* %arrayidx19, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
new file mode 100644
index 00000000000..c88b496e4e9
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+
+; CHECK: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: =
+; CHECK-NOT: fadd
+; CHECK-SAME: >
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-ibm-linux-gnu"
+
+define void @QLA_F3_r_veq_norm2_V(float* noalias nocapture %r, [3 x { float, float }]* noalias nocapture readonly %a, i32 signext %n) #0 {
+entry:
+  %cmp24 = icmp sgt i32 %n, 0
+  br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
+  %sum.026 = phi double [ %add10.2, %for.cond1.preheader ], [ 0.000000e+00, %for.cond1.preheader.preheader ]
+  %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 0
+  %arrayidx5.real = load float, float* %arrayidx5.realp, align 8
+  %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 1
+  %arrayidx5.imag = load float, float* %arrayidx5.imagp, align 8
+  %mul = fmul fast float %arrayidx5.real, %arrayidx5.real
+  %mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
+  %add = fadd fast float %mul9, %mul
+  %conv = fpext float %add to double
+  %add10 = fadd fast double %conv, %sum.026
+  %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 0
+  %arrayidx5.real.1 = load float, float* %arrayidx5.realp.1, align 8
+  %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 1
+  %arrayidx5.imag.1 = load float, float* %arrayidx5.imagp.1, align 8
+  %mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
+  %mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
+  %add.1 = fadd fast float %mul9.1, %mul.1
+  %conv.1 = fpext float %add.1 to double
+  %add10.1 = fadd fast double %conv.1, %add10
+  %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 0
+  %arrayidx5.real.2 = load float, float* %arrayidx5.realp.2, align 8
+  %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 1
+  %arrayidx5.imag.2 = load float, float* %arrayidx5.imagp.2, align 8
+  %mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
+  %mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
+  %add.2 = fadd fast float %mul9.2, %mul.2
+  %conv.2 = fpext float %add.2 to double
+  %add10.2 = fadd fast double %conv.2, %add10.1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.cond.for.end13_crit_edge, label %for.cond1.preheader
+
+for.cond.for.end13_crit_edge:                     ; preds = %for.cond1.preheader
+  %add10.2.lcssa = phi double [ %add10.2, %for.cond1.preheader ]
+  %phitmp = fptrunc double %add10.2.lcssa to float
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.cond.for.end13_crit_edge, %entry
+  %sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
+  store float %sum.0.lcssa, float* %r, align 4
+  ret void
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
new file mode 100644
index 00000000000..5d33887ff0a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
+
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll
new file mode 100644
index 00000000000..d3cdabd26f5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll
@@ -0,0 +1,140 @@
+; RUN: opt < %s -loop-vectorize -mcpu=pwr8 -mattr=+vsx -force-vector-interleave=1 -vectorizer-maximize-bandwidth=0 -S | FileCheck %s
+
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @foo(i8* readonly %ptr, i32 signext %l) {
+entry:
+  %idx.ext = sext i32 %l to i64
+  %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %idx.ext
+  %cmp7 = icmp sgt i32 %l, 0
+  br i1 %cmp7, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %count.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %ptr.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i8, i8* %ptr.addr.08, align 1
+  %cmp1 = icmp slt i8 %0, -64
+  %cond = zext i1 %cmp1 to i32
+  %add = add nsw i32 %cond, %count.09
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.08, i64 1
+  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  %add.lcssa = phi i32 [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %count.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %while.end.loopexit ]
+  ret i32 %count.0.lcssa
+
+; CHECK: load <4 x i8>
+; CHECK: icmp slt <4 x i8>
+}
+
+
+define signext i16 @foo2(i8* readonly %ptr, i32 signext %l) {
+entry:
+  %idx.ext = sext i32 %l to i64 
+  %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %idx.ext
+  %cmp7 = icmp sgt i32 %l, 0
+  br i1 %cmp7, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %count.09 = phi i16 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %ptr.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i8, i8* %ptr.addr.08, align 1
+  %cmp1 = icmp slt i8 %0, -64 
+  %cond = zext i1 %cmp1 to i16 
+  %add = add nsw i16 %cond, %count.09
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.08, i64 1
+  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  %add.lcssa = phi i16 [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %count.0.lcssa = phi i16 [ 0, %entry ], [ %add.lcssa, %while.end.loopexit ]
+  ret i16 %count.0.lcssa
+
+; CHECK-LABEL: foo2
+; CHECK: load <8 x i8>
+; CHECK: icmp slt <8 x i8>
+}
+
+define signext i32 @foo3(i16* readonly %ptr, i32 signext %l) {
+entry:
+  %idx.ext = sext i32 %l to i64 
+  %add.ptr = getelementptr inbounds i16, i16* %ptr, i64 %idx.ext
+  %cmp7 = icmp sgt i32 %l, 0
+  br i1 %cmp7, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %count.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %ptr.addr.16 = phi i16* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i16, i16* %ptr.addr.16, align 1
+  %cmp1 = icmp slt i16 %0, -64 
+  %cond = zext i1 %cmp1 to i32 
+  %add = add nsw i32 %cond, %count.09
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr.addr.16, i64 1
+  %cmp = icmp ult i16* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  %add.lcssa = phi i32 [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %count.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %while.end.loopexit ]
+  ret i32 %count.0.lcssa
+
+; CHECK-LABEL: foo3
+; CHECK: load <4 x i16>
+; CHECK: icmp slt <4 x i16>
+}
+
+define i64 @foo4(i16* readonly %ptr, i32 signext %l) {
+entry:
+  %idx.ext = sext i32 %l to i64 
+  %add.ptr = getelementptr inbounds i16, i16* %ptr, i64 %idx.ext
+  %cmp7 = icmp sgt i32 %l, 0
+  br i1 %cmp7, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %count.09 = phi i64 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %ptr.addr.16 = phi i16* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i16, i16* %ptr.addr.16, align 1
+  %cmp1 = icmp slt i16 %0, -64 
+  %cond = zext i1 %cmp1 to i64 
+  %add = add nsw i64 %cond, %count.09
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr.addr.16, i64 1
+  %cmp = icmp ult i16* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  %add.lcssa = phi i64 [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %count.0.lcssa = phi i64 [ 0, %entry ], [ %add.lcssa, %while.end.loopexit ]
+  ret i64 %count.0.lcssa
+
+; CHECK-LABEL: foo4
+; CHECK: load <2 x i16>
+; CHECK: icmp slt <2 x i16>
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
new file mode 100644
index 00000000000..76864bc4629
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+
+; CHECK: vector.body:
+; CHECK: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NOT: fadd
+; CHECK: middle.block
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-ibm-linux-gnu"
+
+define void @test(double* nocapture readonly %arr, i32 signext %len) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = add i32 %len, -1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %arr, i64 %indvars.iv
+  %1 = load double, double* %arrayidx, align 8
+  %add = fadd fast double %1, %redx.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %0
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  %add.lcssa = phi double [ %add, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %redx.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.end.loopexit ]
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
new file mode 100644
index 00000000000..f6f2609e8fb
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: <2 x double>
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %odd.idx = add nsw i64 %0, 1
+
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+  %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx
+
+  %1 = load double, double* %arrayidx, align 8
+  %2 = load double, double* %arrayidx.odd, align 8
+
+  %add = fadd double %1, %2
+  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  store double %add, double* %arrayidx2, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
new file mode 100644
index 00000000000..8abc25ece35
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define zeroext i32 @test() #0 {
+; CHECK-LABEL: @test
+; CHECK-NOT: x i32>
+
+entry:
+  %a = alloca [1600 x i32], align 4
+  %c = alloca [1600 x i32], align 4
+  %0 = bitcast [1600 x i32]* %a to i8*
+  call void @llvm.lifetime.start(i64 6400, i8* %0) #3
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  %1 = bitcast [1600 x i32]* %c to i8*
+  call void @llvm.lifetime.start(i64 6400, i8* %1) #3
+  %arraydecay = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 0
+  %arraydecay1 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 0
+  %call = call signext i32 @bar(i32* %arraydecay, i32* %arraydecay1) #3
+  br label %for.body6
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv25 = phi i64 [ 0, %entry ], [ %indvars.iv.next26, %for.body ]
+  %arrayidx = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 %indvars.iv25
+  %2 = trunc i64 %indvars.iv25 to i32
+  store i32 %2, i32* %arrayidx, align 4
+  %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1
+  %exitcond27 = icmp eq i64 %indvars.iv.next26, 1600
+  br i1 %exitcond27, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup5:                                ; preds = %for.body6
+  call void @llvm.lifetime.end(i64 6400, i8* nonnull %1) #3
+  call void @llvm.lifetime.end(i64 6400, i8* %0) #3
+  ret i32 %add
+
+for.body6:                                        ; preds = %for.body6, %for.cond.cleanup
+  %indvars.iv = phi i64 [ 0, %for.cond.cleanup ], [ %indvars.iv.next, %for.body6 ]
+  %s.022 = phi i32 [ 0, %for.cond.cleanup ], [ %add, %for.body6 ]
+  %arrayidx8 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %arrayidx8, align 4
+  %add = add i32 %3, %s.022
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+declare signext i32 @bar(i32*, i32*) #2
+
+attributes #0 = { nounwind "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
+attributes #3 = { nounwind }
+
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
new file mode 100644
index 00000000000..15aec0d3539
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -mcpu=pwr7 -mattr=+vsx -loop-vectorize -instcombine -S | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.GlobalData = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] }
+
+@global_data = external global %struct.GlobalData, align 16
+@ntimes = external hidden unnamed_addr global i32, align 4
+
+define signext i32 @s173() #0 {
+entry:
+  %0 = load i32, i32* @ntimes, align 4
+  %cmp21 = icmp sgt i32 %0, 0
+  br i1 %cmp21, label %for.cond1.preheader, label %for.end12
+
+for.cond1.preheader:                              ; preds = %for.end, %entry
+  %nl.022 = phi i32 [ %inc11, %for.end ], [ 0, %entry ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
+  %1 = load float, float* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
+  %2 = load float, float* %arrayidx5, align 4
+  %add = fadd float %1, %2
+  %3 = add nsw i64 %indvars.iv, 16000
+  %arrayidx8 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
+  store float %add, float* %arrayidx8, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16000
+  br i1 %exitcond, label %for.end, label %for.body3
+
+for.end:                                          ; preds = %for.body3
+  %inc11 = add nsw i32 %nl.022, 1
+  %4 = load i32, i32* @ntimes, align 4
+  %mul = mul nsw i32 %4, 10
+  %cmp = icmp slt i32 %inc11, %mul
+  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+
+for.end12:                                        ; preds = %for.end, %entry
+  ret i32 0
+
+; CHECK-LABEL: @s173
+; CHECK: load <4 x float>, <4 x float>*
+; CHECK: add nsw i64 %index, 16000
+; CHECK: ret i32 0
+}
+
+attributes #0 = { nounwind }
+