diff options
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll | 42 | ||||
-rwxr-xr-x | llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll | 32 |
2 files changed, 58 insertions, 16 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll index 5129568075f..fc68adb59df 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -297,3 +298,44 @@ for.cond.cleanup3: %exitcond = icmp eq i32 %inc, 49 br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader } + +; CHECK-LABEL: @PR29559 +; +; UNROLL-NO-IC: vector.ph: +; UNROLL-NO-IC: br label %vector.body +; +; UNROLL-NO-IC: vector.body: +; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; UNROLL-NO-IC: %vector.recur = phi <4 x float*> [ undef, %vector.ph ], [ %[[I4:.+]], %vector.body ] +; UNROLL-NO-IC: %[[G1:.+]] = getelementptr inbounds [3 x float], [3 x float]* undef, i64 0, i64 0 +; UNROLL-NO-IC: %[[I1:.+]] = insertelement <4 x float*> undef, float* %[[G1]], i32 0 +; UNROLL-NO-IC: %[[I2:.+]] = insertelement <4 x float*> %[[I1]], float* %[[G1]], i32 1 +; UNROLL-NO-IC: %[[I3:.+]] = insertelement <4 x float*> %[[I2]], float* %[[G1]], i32 2 +; UNROLL-NO-IC: %[[I4]] = insertelement <4 x float*> %[[I3]], float* %[[G1]], i32 3 +; UNROLL-NO-IC: {{.*}} = shufflevector <4 x float*> %vector.recur, <4 x float*> %[[I4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> +; UNROLL-NO-IC: {{.*}} = shufflevector <4 x float*> %[[I4]], <4 x float*> %[[I4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> +; +; UNROLL-NO-IC: middle.block: +; UNROLL-NO-IC: %vector.recur.extract = extractelement <4 x float*> %[[I4]], i32 3 +; +; UNROLL-NO-IC: scalar.ph: +; UNROLL-NO-IC: %scalar.recur.init = phi float* [ %vector.recur.extract, %middle.block ], [ undef, %min.iters.checked ], [ undef, %entry ] +; +; UNROLL-NO-IC: scalar.body: +; UNROLL-NO-IC: %scalar.recur = phi float* [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ] +; +define void @PR29559() { +entry: + br label %scalar.body + +scalar.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ] + %tmp2 = phi float* [ undef, %entry ], [ %tmp3, %scalar.body ] + %tmp3 = getelementptr inbounds [3 x float], [3 x float]* undef, i64 0, i64 0 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %i.next, undef + br i1 %cond, label %for.end, label %scalar.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 5f165c52efc..ec67e632efb 100755 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -54,6 +54,22 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]] +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15 ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0 ; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP12]], i64 [[TMP61]], i64 0 @@ -87,22 +103,6 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP54]], i64 [[TMP117]], i64 0 ; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15 ; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP57]], i64 [[TMP121]], i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15 ; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) ; CHECK: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> |