diff options
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll | 31 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/small-loop.ll | 6 |
2 files changed, 32 insertions, 5 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 8d139ac7e5a..46fd022af66 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -3,10 +3,11 @@ ; CHECK: LV: Loop hints: force=enabled ; CHECK: LV: Loop hints: force=? +; CHECK: LV: Loop hints: force=? ; No more loops in the module ; CHECK-NOT: LV: Loop hints: force= -; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization -; CHECK: 1 loop-vectorize - Number of loops vectorized +; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 2 loop-vectorize - Number of loops vectorized target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -71,3 +72,29 @@ for.end: !3 = !{!3} +; +; This loop will be vectorized as the trip count is below the threshold but no +; scalar iterations are needed. +; +define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture readonly %B) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %add = fadd fast float %0, %1 + store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 + +for.end: + ret void +} + +!4 = !{!4} + diff --git a/llvm/test/Transforms/LoopVectorize/small-loop.ll b/llvm/test/Transforms/LoopVectorize/small-loop.ll index 9a5dc4aa1b7..378283b464b 100644 --- a/llvm/test/Transforms/LoopVectorize/small-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/small-loop.ll @@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 @c = common global [2048 x i32] zeroinitializer, align 16 ;CHECK-LABEL: @example1( -;CHECK-NOT: load <4 x i32> +;CHECK: load <4 x i32> ;CHECK: ret void define void @example1() nounwind uwtable ssp { br label %1 @@ -23,8 +23,8 @@ define void @example1() nounwind uwtable ssp { store i32 %6, i32* %7, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count. - br i1 %exitcond, label %8, label %1 + %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count + br i1 %exitcond, label %8, label %1 ; w/o scalar iteration overhead. ; <label>:8 ; preds = %1 ret void |