summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-07-08 16:10:42 +0000
committerSanjay Patel <spatel@rotateright.com>2017-07-08 16:10:42 +0000
commit28f53ef44da2886baa48f7bcd3c0e3daae4acdfb (patch)
tree9dd9f79a22e24081ec381b692ff4fc530dfb1270
parent79bf29ccb73df8621c492da71a1757877f429a1e (diff)
downloadbcm5719-llvm-28f53ef44da2886baa48f7bcd3c0e3daae4acdfb.tar.gz
bcm5719-llvm-28f53ef44da2886baa48f7bcd3c0e3daae4acdfb.zip
[LoopVectorize] auto-generate complete checks; NFC
I'm looking at a cmp transform in InstCombine that would affect these tests, but it's hard to know if it makes things better or worse without seeing the full IR. OTOH, maybe these tests shouldn't be running a bunch of transform passes in the first place? llvm-svn: 307475
-rw-r--r--llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll95
-rw-r--r--llvm/test/Transforms/LoopVectorize/if-conversion.ll213
2 files changed, 279 insertions, 29 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 3a581ebf847..7f381ae6ad7 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -1,18 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-;CHECK-LABEL: @foo(
-;CHECK: icmp sgt
-;CHECK: icmp sgt
-;CHECK: icmp slt
-;CHECK: select <4 x i1>
-;CHECK: %[[P1:.*]] = select <4 x i1>
-;CHECK: xor <4 x i1>
-;CHECK: and <4 x i1>
-;CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %[[P1]]
-;CHECK: ret
define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP26]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
+; CHECK: min.iters.checked:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[N]], 3
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
+; CHECK-NEXT: br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
+; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !3
+; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
+; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i1> [[TMP12]], [[TMP11]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i1> [[TMP11]], [[TMP16]]
+; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[PREDPHI]]
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP18]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]]
+; CHECK: if.then:
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], 19
+; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP21]], 4
+; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5
+; CHECK-NEXT: br label [[IF_END14]]
+; CHECK: if.end14:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 9, [[FOR_BODY]] ], [ 3, [[IF_THEN]] ], [ [[DOT]], [[IF_ELSE]] ]
+; CHECK-NEXT: store i32 [[X_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !8
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret i32 undef
+;
entry:
%cmp26 = icmp sgt i32 %n, 0
br i1 %cmp26, label %for.body, label %for.end
@@ -46,3 +120,4 @@ if.end14:
for.end:
ret i32 undef
}
+
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
index ad50e0b00fc..9291faca19c 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -16,14 +17,91 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; }
;}
-;CHECK-LABEL: @function0(
-;CHECK: load <4 x i32>
-;CHECK: icmp sgt <4 x i32>
-;CHECK: mul <4 x i32>
-;CHECK: add <4 x i32>
-;CHECK: select <4 x i1>
-;CHECK: ret i32
define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+; CHECK-LABEL: @function0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP16:%.*]] = icmp slt i32 [[START:%.*]], [[END:%.*]]
+; CHECK-NEXT: br i1 [[CMP16]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[START]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[END]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[START]]
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
+; CHECK: min.iters.checked:
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 3
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT: [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
+; CHECK-NEXT: br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[END]], -1
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[START]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw i64 [[TMP0]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP10]], 1
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP11]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP6]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i32* [[SCEVGEP4]], [[SCEVGEP2]]
+; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[IND_END:%.*]] = add nsw i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP0]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !3
+; CHECK-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[WIDE_LOAD]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i32> [[TMP17]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD8]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP19]], <4 x i32> [[TMP18]], <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[PREDPHI]], <4 x i32>* [[TMP20]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[TMP0]], [[MIN_ITERS_CHECKED]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: br i1 [[CMP5]], label [[IF_THEN:%.*]], label [[IF_END]]
+; CHECK: if.then:
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP22]], 5
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 3
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[K_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[TMP22]], [[FOR_BODY]] ]
+; CHECK-NEXT: store i32 [[K_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP24]], [[END]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop !8
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret i32 undef
+;
entry:
%cmp16 = icmp slt i32 %start, %end
br i1 %cmp16, label %for.body.lr.ph, label %for.end
@@ -69,13 +147,74 @@ for.end:
; return sum;
; }
-;CHECK-LABEL: @reduction_func(
-;CHECK: load <4 x i32>
-;CHECK: icmp slt <4 x i32>
-;CHECK: add <4 x i32>
-;CHECK: select <4 x i1>
-;CHECK: ret i32
define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+; CHECK-LABEL: @reduction_func(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
+; CHECK: min.iters.checked:
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[N]], 3
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
+; CHECK-NEXT: br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[VEC_PHI]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !9
+; CHECK: middle.block:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[PREDPHI]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PREDPHI]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[SUM_011:%.*]] = phi i32 [ [[SUM_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 30
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_011]], 2
+; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[ADD]], [[TMP11]]
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[SUM_1]] = phi i32 [ [[ADD4]], [[IF_THEN]] ], [ [[SUM_011]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !10
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUM_1_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
+;
entry:
%cmp10 = icmp sgt i32 %n, 0
br i1 %cmp10, label %for.body, label %for.end
@@ -112,10 +251,32 @@ for.end: ; preds = %for.inc, %entry
; constant expression.
; PR16729
-; CHECK-LABEL: trapping_constant_expression
-; CHECK-NOT: or <4 x i32>
-
define i32 @trapping_constant_expression() {
+; CHECK-LABEL: @trapping_constant_expression(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
+; CHECK: min.iters.checked:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: br i1 false, label [[COND_FALSE:%.*]], label [[COND_END:%.*]]
+; CHECK: cond.false:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !13
+; CHECK: for.end:
+; CHECK-NEXT: ret i32 0
+;
entry:
br label %for.body
@@ -142,10 +303,24 @@ for.end:
; trapping constant expression.
; PR16729
-; CHECK-LABEL: trapping_constant_expression2
-; CHECK-NOT: or <4 x i32>
-
define i32 @trapping_constant_expression2() {
+; CHECK-LABEL: @trapping_constant_expression2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INC3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[OR2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OR:%.*]], [[COND_END]] ]
+; CHECK-NEXT: br i1 false, label [[COND_FALSE:%.*]], label [[COND_END]]
+; CHECK: cond.false:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[OR]] = or i32 [[OR2]], [[INC3]]
+; CHECK-NEXT: [[INC]] = add nsw i32 [[INC3]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC3]], 127
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret i32 [[OR]]
+;
entry:
br label %for.body
OpenPOWER on IntegriCloud