summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp9
-rw-r--r--llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/global_alias.ll6
-rw-r--r--llvm/test/Transforms/LoopVectorize/induction_plus.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll74
6 files changed, 92 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b5d7351b60..06413546619 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4506,6 +4506,15 @@ static bool mayDivideByZero(Instruction &I) {
void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// For each instruction in the old loop.
for (Instruction &I : *BB) {
+
+ // Scalarize instructions that should remain scalar after vectorization.
+ if (!(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
+ isa<DbgInfoIntrinsic>(&I)) &&
+ Legal->isScalarAfterVectorization(&I)) {
+ scalarizeInstruction(&I);
+ continue;
+ }
+
switch (I.getOpcode()) {
case Instruction::Br:
// Nothing to do for PHIs and BR, since we already took care of the
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
index cdd5f042350..76864bc4629 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -1,5 +1,6 @@
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; CHECK: vector.body:
; CHECK: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
@@ -12,9 +13,8 @@
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
-; CHECK-NEXT: =
; CHECK-NOT: fadd
-; CHECK-SAME: >
+; CHECK: middle.block
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-ibm-linux-gnu"
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
index 65b3919585e..15aec0d3539 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -43,7 +43,7 @@ for.end12: ; preds = %for.end, %entry
; CHECK-LABEL: @s173
; CHECK: load <4 x float>, <4 x float>*
-; CHECK: add i64 %index, 16000
+; CHECK: add nsw i64 %index, 16000
; CHECK: ret i32 0
}
diff --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll
index 16f50951a15..7333af3b925 100644
--- a/llvm/test/Transforms/LoopVectorize/global_alias.ll
+++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll
@@ -387,7 +387,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias08(
-; CHECK: sub <4 x i32>
+; CHECK: load <4 x i32>
; CHECK: ret
define i32 @noAlias08(i32 %a) #0 {
@@ -439,7 +439,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias09(
-; CHECK: sub <4 x i32>
+; CHECK: load <4 x i32>
; CHECK: ret
define i32 @noAlias09(i32 %a) #0 {
@@ -721,7 +721,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias14(
-; CHECK: sub <4 x i32>
+; CHECK: load <4 x i32>
; CHECK: ret
define i32 @noAlias14(i32 %a) #0 {
diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll
index ce9abba41bd..b73158a8265 100644
--- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll
@@ -9,7 +9,9 @@ target triple = "x86_64-apple-macosx10.8.0"
;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
-;CHECK: add nsw <4 x i64> %vec.ind, <i64 12, i64 12, i64 12, i64 12>
+;CHECK: %[[T1:.+]] = add i64 %index, 0
+;CHECK: %[[T2:.+]] = add nsw i64 %[[T1]], 12
+;CHECK: getelementptr inbounds [1024 x i32], [1024 x i32]* @array, i64 0, i64 %[[T2]]
;CHECK: %vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
;CHECK: %vec.ind.next2 = add <4 x i32> %vec.ind1, <i32 4, i32 4, i32 4, i32 4>
;CHECK: ret i32
diff --git a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
new file mode 100644
index 00000000000..2c77bd09959
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -instcombine -S | FileCheck %s
+; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s --check-prefix=NO-IC
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @scalar_after_vectorization_0
+;
+; CHECK: vector.body:
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = or i64 %index, 1
+; CHECK: %[[T2:.+]] = add nuw nsw i64 %offset.idx, %tmp0
+; CHECK: %[[T3:.+]] = sub nsw i64 %[[T2]], %x
+; CHECK: %[[T4:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T3]]
+; CHECK: %[[T5:.+]] = bitcast i32* %[[T4]] to <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>* %[[T5]], align 4
+; CHECK: %[[T6:.+]] = getelementptr i32, i32* %[[T4]], i64 4
+; CHECK: %[[T7:.+]] = bitcast i32* %[[T6]] to <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>* %[[T7]], align 4
+; CHECK: br {{.*}}, label %middle.block, label %vector.body
+;
+; NO-IC-LABEL: @scalar_after_vectorization_0
+;
+; NO-IC: vector.body:
+; NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; NO-IC: %offset.idx = add i64 1, %index
+; NO-IC: %[[T2:.+]] = add i64 %offset.idx, 0
+; NO-IC: %[[T3:.+]] = add i64 %offset.idx, 4
+; NO-IC: %[[T4:.+]] = add nuw nsw i64 %[[T2]], %tmp0
+; NO-IC: %[[T5:.+]] = add nuw nsw i64 %[[T3]], %tmp0
+; NO-IC: %[[T6:.+]] = sub nsw i64 %[[T4]], %x
+; NO-IC: %[[T7:.+]] = sub nsw i64 %[[T5]], %x
+; NO-IC: %[[T8:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T6]]
+; NO-IC: %[[T9:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T7]]
+; NO-IC: %[[T10:.+]] = getelementptr i32, i32* %[[T8]], i32 0
+; NO-IC: %[[T11:.+]] = bitcast i32* %[[T10]] to <4 x i32>*
+; NO-IC: load <4 x i32>, <4 x i32>* %[[T11]], align 4
+; NO-IC: %[[T12:.+]] = getelementptr i32, i32* %[[T8]], i32 4
+; NO-IC: %[[T13:.+]] = bitcast i32* %[[T12]] to <4 x i32>*
+; NO-IC: load <4 x i32>, <4 x i32>* %[[T13]], align 4
+; NO-IC: br {{.*}}, label %middle.block, label %vector.body
+;
+define void @scalar_after_vectorization_0(i32* noalias %a, i32* noalias %b, i64 %x, i64 %y) {
+
+outer.ph:
+ br label %outer.body
+
+outer.body:
+ %i = phi i64 [ 1, %outer.ph ], [ %i.next, %inner.end ]
+ %tmp0 = mul nuw nsw i64 %i, %x
+ br label %inner.ph
+
+inner.ph:
+ br label %inner.body
+
+inner.body:
+ %j = phi i64 [ 1, %inner.ph ], [ %j.next, %inner.body ]
+ %tmp1 = add nuw nsw i64 %j, %tmp0
+ %tmp2 = sub nsw i64 %tmp1, %x
+ %tmp3 = getelementptr inbounds i32, i32* %a, i64 %tmp2
+ %tmp4 = load i32, i32* %tmp3, align 4
+ %tmp5 = getelementptr inbounds i32, i32* %b, i64 %tmp1
+ store i32 %tmp4, i32* %tmp5, align 4
+ %j.next = add i64 %j, 1
+ %cond.j = icmp slt i64 %j.next, %y
+ br i1 %cond.j, label %inner.body, label %inner.end
+
+inner.end:
+ %i.next = add i64 %i, 1
+ %cond.i = icmp slt i64 %i.next, %y
+ br i1 %cond.i, label %outer.body, label %outer.end
+
+outer.end:
+ ret void
+}
OpenPOWER on IntegriCloud