diff options
author | Philip Reames <listmail@philipreames.com> | 2019-10-01 17:03:44 +0000 |
---|---|---|
committer | Philip Reames <listmail@philipreames.com> | 2019-10-01 17:03:44 +0000 |
commit | 0200626f0bfe088ab65cfe69cb8f2647e71d08b3 (patch) | |
tree | 68acf9777c5844ef0488acfcb44ace6d8c334124 /llvm/test/Transforms/IndVarSimplify | |
parent | 9dba603748a8e7420cf8352f0e17277ac8eedebe (diff) | |
download | bcm5719-llvm-0200626f0bfe088ab65cfe69cb8f2647e71d08b3.tar.gz bcm5719-llvm-0200626f0bfe088ab65cfe69cb8f2647e71d08b3.zip |
[IndVars] An implementation of loop predication without a need for speculation
This patch implements a variation of a well known techniques for JIT compilers - we have an implementation in tree as LoopPredication - but with an interesting twist. This version does not assume the ability to execute a path which wasn't taken in the original program (such as a guard or widenable.condition intrinsic). The benefit is that this works for arbitrary IR from any frontend (including C/C++/Fortran). The tradeoff is that it's restricted to read only loops without implicit exits.
This builds on SCEV, and can thus eliminate the loop varying portion of the any early exit where all exits are understandable by SCEV. A key advantage is that fixing deficiency exposed in SCEV - already found one while writing test cases - will also benefit all of full redundancy elimination (and most other loop transforms).
I haven't seen anything in the literature which quite matches this. Given that, I'm not entirely sure that keeping the name "loop predication" is helpful. Anyone have suggestions for a better name? This is analogous to partial redundancy elimination - since we remove the condition flowing around the backedge - and has some parallels to our existing transforms which try to make conditions invariant in loops.
Factoring wise, I chose to put this in IndVarSimplify since it's a generally applicable to all workloads. I could split this off into it's own pass, but we'd then probably want to add that new pass every place we use IndVars. One solid argument for splitting it off into it's own pass is that this transform is "too good". It breaks a huge number of existing IndVars test cases as they tend to be simple read only loops. At the moment, I've opted it off by default, but if we add this to IndVars and enable, we'll have to update around 20 test files to add side effects or disable this transform.
Near term plan is to fuzz this extensively while off by default, reflect and discuss on the factoring issue mentioned just above, and then enable by default. I also need to give some though to supporting widenable conditions in this framing.
Differential Revision: https://reviews.llvm.org/D67408
llvm-svn: 373351
Diffstat (limited to 'llvm/test/Transforms/IndVarSimplify')
-rw-r--r-- | llvm/test/Transforms/IndVarSimplify/loop-predication.ll | 790 |
1 files changed, 790 insertions, 0 deletions
diff --git a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll new file mode 100644 index 00000000000..c5fe11a3446 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll @@ -0,0 +1,790 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -indvars-predicate-loops=1 -S | FileCheck %s + +declare void @prevent_merging() + +; Base case +define i32 @test1(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Has side effect which must be reflected +define i32 @neg_store(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @neg_store( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + store i32 0, i32* %array.i.ptr + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +declare void @maythrow() + +; May exit through implicit exception edge +define i32 @neg_implicit_exit(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @neg_implicit_exit( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @maythrow() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @maythrow() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + + + +; Base case, but in LFTR form (just for sanity checking) +define i32 @test2(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ne i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ne i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; br (and rcheck1, rcheck2) +define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32 %n) { +; CHECK-LABEL: @two_range_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_range_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] +; CHECK-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] +; CHECK-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] +; CHECK-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Analogous to the above, but with two distinct branches (on different conditions) +define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @distinct_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded1: +; CHECK-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded4, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %within.bounds.2 = icmp ult i32 %i, %length.2 + br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void @prevent_merging() + ret i32 -1 + +guarded1: ; preds = %guarded1 + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded1 ] + ret i32 %result +} + +define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %length, i32 %n) { +; CHECK-LABEL: @duplicate_checks( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded1: +; CHECK-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded4, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length + br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %within.bounds.2 = icmp ult i32 %i, %length + br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void @prevent_merging() + ret i32 -1 + +guarded1: ; preds = %guarded1 + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded1 ] + ret i32 %result +} + + +define i32 @provably_taken(i32* %array, i32* %length.ptr) { +; CHECK-LABEL: @provably_taken( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: + %length = load i32, i32* %length.ptr, !range !2 + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp slt i32 %i.next, 200 + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Non-latch exits can still be predicated +define i32 @unconditional_latch(i32* %a, i32 %length) { +; CHECK-LABEL: @unconditional_latch( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 false, label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: br label [[LOOP]] +; +loop.preheader: + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %i = phi i32 [ %i.next, %guarded ], [ 400, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.next = add i32 %i, 1 + br label %loop +} + +; Side effect in loop must run proper number of times +define i32 @unconditional_latch_with_side_effect(i32* %a, i32 %length) { +; CHECK-LABEL: @unconditional_latch_with_side_effect( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED:%.*]] ], [ 400, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: store volatile i32 0, i32* [[A:%.*]] +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; CHECK-NEXT: br label [[LOOP]] +; +loop.preheader: + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %i = phi i32 [ %i.next, %guarded ], [ 400, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + store volatile i32 0, i32* %a + %i.next = add i32 %i, 1 + br label %loop +} + +; Demonstrate that this approach works with IVs of different steps, and types +; This version uses a manually lftred exit condition to work around an issue described +; in detail on next test. +define i32 @different_ivs(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @different_ivs( +; CHECK-NEXT: loop.preheader: +; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +loop.preheader: + %j.start = sub nuw nsw i32 %length, 1 + %n64 = zext i32 %n to i64 + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i64 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %guarded ], [ %j.start, %loop.preheader ] + %within.bounds = icmp ne i32 %j, -1 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: + call void @prevent_merging() + ret i32 -1 + +guarded: + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i64 %i, 1 + %j.next = sub nuw i32 %j, 1 + %continue = icmp ult i64 %i.next, %n64 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; TODO: We're failing to compute an exit count for the bounds check. +; From some quick analysis, it looks like we don't handle -1 step +; in howManyLessThans. Should be a simple fix. +define i32 @different_ivs2(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @different_ivs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POS_LENGTH:%.*]] = icmp sgt i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: br i1 [[POS_LENGTH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[J_START:%.*]] = sub nuw nsw i32 [[LENGTH]], 1 +; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[GUARDED]] ], [ [[J_START]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[J]], [[LENGTH]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void @prevent_merging() +; CHECK-NEXT: ret i32 -1 +; CHECK: guarded: +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[J_NEXT]] = sub nuw i32 [[J]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[LOOP_ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_ACC_NEXT_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %pos_length = icmp sgt i32 %length, 0 + br i1 %pos_length, label %loop.preheader, label %exit + +loop.preheader: + %j.start = sub nuw nsw i32 %length, 1 + %n64 = zext i32 %n to i64 + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i64 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %guarded ], [ %j.start, %loop.preheader ] + %within.bounds = icmp ult i32 %j, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: + call void @prevent_merging() + ret i32 -1 + +guarded: + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i64 %i, 1 + %j.next = sub nuw i32 %j, 1 + %continue = icmp ult i64 %i.next, %n64 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ], [0, %entry] + ret i32 %result +} + + + +declare i32 @llvm.experimental.deoptimize.i32(...) + +!0 = !{!"branch_weights", i32 1048576, i32 1} +!1 = !{i32 1, i32 -2147483648} +!2 = !{i32 0, i32 50} |