diff options
Diffstat (limited to 'llvm/test')
8 files changed, 85 insertions, 20 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll index 65b3919585e..12a16018f58 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll @@ -43,7 +43,7 @@ for.end12: ; preds = %for.end, %entry ; CHECK-LABEL: @s173 ; CHECK: load <4 x float>, <4 x float>* -; CHECK: add i64 %index, 16000 +; CHECK: add nsw i64 %.lhs, 16000 ; CHECK: ret i32 0 } diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 2f1de54d5f9..23e363eae02 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -95,7 +95,7 @@ for.end: ; preds = %for.cond %struct.In = type { float, float } ;AVX512-LABEL: @foo2 -;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %induction, i32 1 +;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %{{.*}}, i32 1 ;AVX512: llvm.masked.gather.v16f32 ;AVX512: llvm.masked.store.v16f32 ;AVX512: ret void @@ -170,10 +170,10 @@ for.end: ; preds = %for.cond ;} ;AVX512-LABEL: @foo3 -;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %induction, i32 1 +;AVX512: getelementptr %struct.In, %struct.In* %in, <16 x i64> %{{.*}}, i32 1 ;AVX512: llvm.masked.gather.v16f32 ;AVX512: fadd <16 x float> -;AVX512: getelementptr %struct.Out, %struct.Out* %out, <16 x i64> %induction, i32 1 +;AVX512: getelementptr %struct.Out, %struct.Out* %out, <16 x i64> %{{.*}}, i32 1 ;AVX512: llvm.masked.scatter.v16f32 ;AVX512: ret void diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index fae89976a7b..54f68b7bd07 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.8.0" @a = common global [2048 x i32] zeroinitializer, align 16 ;CHECK-LABEL: @example12( -;CHECK: trunc i64 +;CHECK: %vec.ind1 = phi <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @example12() nounwind uwtable ssp { diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll index 18809018615..95b0d16d57f 100644 --- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll @@ -368,7 +368,7 @@ define void @example11() nounwind uwtable ssp { } ;CHECK-LABEL: @example12( -;CHECK: trunc i64 +;CHECK: %vec.ind1 = phi <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @example12() nounwind uwtable ssp { diff --git a/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll index ab2fd5e4e1c..fb12e172f54 100644 --- a/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll +++ b/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll @@ -12,10 +12,11 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: @foo ; CHECK: vector.body -; CHECK: %0 = getelementptr inbounds double*, double** %in, i64 %index -; CHECK: %1 = bitcast double** %0 to <4 x i64>* -; CHECK: %wide.load = load <4 x i64>, <4 x i64>* %1, align 8 -; CHECK: %2 = icmp eq <4 x i64> %wide.load, zeroinitializer +; CHECK: %0 = phi +; CHECK: %2 = getelementptr inbounds double*, double** %in, i64 %0 +; CHECK: %3 = bitcast double** %2 to <4 x i64>* +; CHECK: %wide.load = load <4 x i64>, <4 x i64>* %3, align 8 +; CHECK: %4 = icmp eq <4 x i64> %wide.load, zeroinitializer ; CHECK: br i1 define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 { @@ -37,4 +38,4 @@ for.body: for.end: ret void -}
\ No newline at end of file +} diff --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll index 84fa48cd514..0da841bcbbd 100644 --- a/llvm/test/Transforms/LoopVectorize/global_alias.ll +++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll @@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 @PA = external global i32* -;; === First, the tests that should always vectorize, wither statically or by adding run-time checks === +;; === First, the tests that should always vectorize, whether statically or by adding run-time checks === ; /// Different objects, positive induction, constant distance @@ -387,7 +387,7 @@ for.end: ; preds = %for.cond ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @noAlias08( -; CHECK: sub <4 x i32> +; CHECK: sub nuw nsw <4 x i32> ; CHECK: ret define i32 @noAlias08(i32 %a) #0 { @@ -439,7 +439,7 @@ for.end: ; preds = %for.cond ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @noAlias09( -; CHECK: sub <4 x i32> +; CHECK: sub nuw nsw <4 x i32> ; CHECK: ret define i32 @noAlias09(i32 %a) #0 { @@ -721,7 +721,7 @@ for.end: ; preds = %for.cond ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @noAlias14( -; CHECK: sub <4 x i32> +; CHECK: sub nuw nsw <4 x i32> ; CHECK: ret define i32 @noAlias14(i32 %a) #0 { diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 8e3cf365e83..c2d4d96153a 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -27,8 +29,6 @@ for.end: ret void } -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND - ; Make sure we remove unneeded vectorization of induction variables. ; In order for instcombine to cleanup the vectorized induction variables that we ; create in the loop vectorizer we need to perform some form of redundancy @@ -241,3 +241,64 @@ entry: exit: ret void } + +; Check that we generate vectorized IVs in the pre-header +; instead of widening the scalar IV inside the loop, when +; we know how to do that. +; IND-LABEL: veciv +; IND: vector.body: +; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add, %vector.body ] +; IND: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> +; IND: %index.next = add i32 %index, 2 +; IND: %[[CMP:.*]] = icmp eq i32 %index.next +; IND: br i1 %[[CMP]] +; UNROLL-LABEL: veciv +; UNROLL: vector.body: +; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add1, %vector.body ] +; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> +; UNROLL: %step.add1 = add <2 x i32> %vec.ind, <i32 4, i32 4> +; UNROLL: %index.next = add i32 %index, 4 +; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next +; UNROLL: br i1 %[[CMP]] +define void @veciv(i32* nocapture %a, i32 %start, i32 %k) { +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv + store i32 %indvars.iv, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, %k + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} + +; IND-LABEL: trunciv +; IND: vector.body: +; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ] +; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2> +; IND: %index.next = add i64 %index, 2 +; IND: %[[CMP:.*]] = icmp eq i64 %index.next +; IND: br i1 %[[CMP]] +define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %trunc.iv = trunc i64 %indvars.iv to i32 + %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv + store i32 %trunc.iv, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %k + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll index 7c4c8f2edcb..5e96d4196ca 100644 --- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll +++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -6,8 +6,11 @@ target triple = "x86_64-apple-macosx10.8.0" @array = common global [1024 x i32] zeroinitializer, align 16 ;CHECK-LABEL: @array_at_plus_one( -;CHECK: add i64 %index, 12 -;CHECK: trunc i64 +;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %step.add, %vector.body ] +;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %step.add2, %vector.body ] +;CHECK: add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4> +;CHECK: add nsw <4 x i64> %vec.ind, <i64 12, i64 12, i64 12, i64 12> ;CHECK: ret i32 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 |

