diff options
| author | Artur Pilipenko <apilipenko@azulsystems.com> | 2017-09-22 13:13:57 +0000 |
|---|---|---|
| committer | Artur Pilipenko <apilipenko@azulsystems.com> | 2017-09-22 13:13:57 +0000 |
| commit | 889dc1e3a58cb9c2e38d49138f6e338242315de3 (patch) | |
| tree | c556411a1f4880e56c775e087646fff01ab70edd /llvm/test/Transforms/LoopPredication | |
| parent | cea42b7fff23a7637a641cb62666ca0756b9b81d (diff) | |
| download | bcm5719-llvm-889dc1e3a58cb9c2e38d49138f6e338242315de3.tar.gz bcm5719-llvm-889dc1e3a58cb9c2e38d49138f6e338242315de3.zip | |
Rework loop predication pass
We've found a serious issue with the current implementation of loop predication.
The current implementation relies on SCEV and this turned out to be problematic.
To fix the problem we had to rework the pass substantially. We have had the
reworked implementation in our downstream tree for a while. This is the initial
patch of the series of changes to upstream the new implementation.
For now the transformation is limited to the following case:
* The loop has a single latch with either ult or slt icmp condition.
* The step of the IV used in the latch condition is 1.
* The IV of the latch condition is the same as the post increment IV of the guard condition.
* The guard condition is ult.
See the review or the LoopPredication.cpp header for the details about the
problem and the new implementation.
Reviewed By: sanjoy, mkazantsev
Differential Revision: https://reviews.llvm.org/D37569
llvm-svn: 313981
Diffstat (limited to 'llvm/test/Transforms/LoopPredication')
| -rw-r--r-- | llvm/test/Transforms/LoopPredication/basic.ll | 344 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopPredication/nested.ll | 83 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopPredication/visited.ll | 5 |
3 files changed, 286 insertions, 146 deletions
diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll index 6ce07819cb0..a4b4e742a10 100644 --- a/llvm/test/Transforms/LoopPredication/basic.ll +++ b/llvm/test/Transforms/LoopPredication/basic.ll @@ -11,8 +11,9 @@ entry: loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -46,8 +47,9 @@ entry: loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -73,44 +75,35 @@ exit: ret i32 %result } - -define i32 @two_range_checks(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, i32 %n) { -; CHECK-LABEL: @two_range_checks +define i32 @signed_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_ult_check entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - %within.bounds.1 = icmp ult i32 %i, %length.1 - %within.bounds.2 = icmp ult i32 %i, %length.2 - %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.1, %array.2.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -118,52 +111,33 @@ exit: ret i32 %result } -define i32 @three_range_checks(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, - i32* %array.3, i32 %length.3, i32 %n) { -; CHECK-LABEL: @three_range_checks +define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} -; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] -; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: %within.bounds = icmp ult i32 %i, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - %within.bounds.1 = icmp ult i32 %i, %length.1 - %within.bounds.2 = icmp ult i32 %i, %length.2 - %within.bounds.3 = icmp ult i32 %i, %length.3 - %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 - %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.2 = add i32 %loop.acc.1, %array.2.i - - %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 - %array.3.i = load i32, i32* %array.3.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %i.next = add nsw i32 %i, 1 + %continue = icmp ne i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -171,56 +145,33 @@ exit: ret i32 %result } -define i32 @three_guards(i32* %array.1, i32 %length.1, - i32* %array.2, i32 %length.2, - i32* %array.3, i32 %length.3, i32 %n) { -; CHECK-LABEL: @three_guards +define i32 @signed_loop_0_to_n_unsupported_iv_step(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_unsupported_iv_step entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1 -; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2 -; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3 ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] - +; CHECK: %within.bounds = icmp ult i32 %i, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] - - %within.bounds.1 = icmp ult i32 %i, %length.1 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 - %array.1.i = load i32, i32* %array.1.i.ptr, align 4 - %loop.acc.1 = add i32 %loop.acc, %array.1.i - - %within.bounds.2 = icmp ult i32 %i, %length.2 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] - - %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 - %array.2.i = load i32, i32* %array.2.i.ptr, align 4 - %loop.acc.2 = add i32 %loop.acc.1, %array.2.i - - %within.bounds.3 = icmp ult i32 %i, %length.3 - call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] - - %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 - %array.3.i = load i32, i32* %array.3.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n + %i.next = add nsw i32 %i, 2 + %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -228,15 +179,17 @@ exit: ret i32 %result } -define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check +define i32 @signed_loop_0_to_n_equal_iv_range_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_equal_iv_range_check entry: - %tmp5 = icmp eq i32 %n, 0 + %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0 +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -244,8 +197,10 @@ loop: ; CHECK: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds = icmp sge i32 %i, 0 + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -253,6 +208,7 @@ loop: %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i + %j.next = add nsw i32 %j, 1 %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit @@ -262,28 +218,26 @@ exit: ret i32 %result } -define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check +define i32 @signed_loop_0_to_n_unrelated_iv_range_check(i32* %array, i32 %start, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_unrelated_iv_range_check entry: %tmp5 = icmp sle i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1 -; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] -; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: %within.bounds = icmp ult i32 %j, %length +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds = icmp slt i32 %i, %length + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %loop ], [ %start, %loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -291,6 +245,7 @@ loop: %array.i = load i32, i32* %array.i.ptr, align 4 %loop.acc.next = add i32 %loop.acc, %array.i + %j.next = add nsw i32 %j, 1 %i.next = add nsw i32 %i, 1 %continue = icmp slt i32 %i.next, %n br i1 %continue, label %loop, label %exit @@ -300,41 +255,166 @@ exit: ret i32 %result } -define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) { -; CHECK-LABEL: @signed_loop_start_to_n_both_checks +define i32 @two_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, i32 %n) { +; CHECK-LABEL: @two_range_checks entry: - %tmp5 = icmp sle i32 %n, 0 + %tmp5 = icmp eq i32 %n, 0 br i1 %tmp5, label %exit, label %loop.preheader loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0 -; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1 -; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] -; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 -; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] ; CHECK-NEXT: br label %loop br label %loop loop: ; CHECK: loop: -; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] - %within.bounds.1 = icmp slt i32 %i, %length - %within.bounds.2 = icmp sge i32 %i, 0 + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 %within.bounds = and i1 %within.bounds.1, %within.bounds.2 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 - %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 - %array.i = load i32, i32* %array.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc, %array.i + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i - %i.next = add nsw i32 %i, 1 - %continue = icmp slt i32 %i.next, %n + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_range_checks +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] +; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_guards(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_guards +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]] +; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]] +; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}} +; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] + + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds.1 = icmp ult i32 %i, %length.1 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %within.bounds.2 = icmp ult i32 %i, %length.2 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %within.bounds.3 = icmp ult i32 %i, %length.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n br i1 %continue, label %loop, label %exit exit: @@ -350,8 +430,9 @@ entry: loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop @@ -439,12 +520,12 @@ loop.preheader: loop: ; CHECK: loop: ; CHECK: %bound = add i32 %i, %x -; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound +; CHECK-NEXT: %within.bounds = icmp ult i32 %i, %bound ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] %bound = add i32 %i, %x - %within.bounds = icmp slt i32 %i, %bound + %within.bounds = icmp ult i32 %i, %bound call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -503,9 +584,10 @@ entry: loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[length:[^ ]+]] = zext i16 %length.i16 to i32 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], [[length]] +; CHECK: [[length:[^ ]+]] = zext i16 %length.i16 to i32 +; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, [[length]] +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, [[length]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop diff --git a/llvm/test/Transforms/LoopPredication/nested.ll b/llvm/test/Transforms/LoopPredication/nested.ll index 6b40cde3e57..796839feec8 100644 --- a/llvm/test/Transforms/LoopPredication/nested.ll +++ b/llvm/test/Transforms/LoopPredication/nested.ll @@ -10,8 +10,6 @@ entry: br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: -; CHECK: outer.loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1 br label %outer.loop outer.loop: @@ -22,7 +20,10 @@ outer.loop: inner.loop.preheader: ; CHECK: inner.loop.preheader: -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %inner.loop br label %inner.loop inner.loop: @@ -31,7 +32,7 @@ inner.loop: %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %j, %length + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %j.i64 = zext i32 %j to i64 @@ -62,8 +63,10 @@ entry: outer.loop.preheader: ; CHECK: outer.loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %outer.loop br label %outer.loop outer.loop: @@ -82,7 +85,7 @@ inner.loop: %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %i, %length + %within.bounds = icmp ult i32 %i, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %i.i64 = zext i32 %i to i64 @@ -112,14 +115,15 @@ entry: br i1 %tmp5, label %exit, label %outer.loop.preheader outer.loop.preheader: +; CHECK: outer.loop.preheader: +; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length +; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]] +; CHECK-NEXT: br label %outer.loop br label %outer.loop outer.loop: ; CHECK: outer.loop: -; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1 -; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]] -; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]] -; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1 %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] %tmp6 = icmp sle i32 %l, 0 @@ -127,16 +131,69 @@ outer.loop: inner.loop.preheader: ; CHECK: inner.loop.preheader: -; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length +; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length +; CHECK: br label %inner.loop br label %inner.loop inner.loop: ; CHECK: inner.loop: +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]] ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ] - %within.bounds = icmp slt i32 %j, %length + %within.bounds = icmp ult i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %j.i64 = zext i32 %j to i64 + %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 + %array.j = load i32, i32* %array.j.ptr, align 4 + %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j + + %j.next = add nsw i32 %j, 1 + %inner.continue = icmp slt i32 %j.next, %l + br i1 %inner.continue, label %inner.loop, label %outer.loop.inc + +outer.loop.inc: + %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] + %i.next = add nsw i32 %i, 1 + %outer.continue = icmp slt i32 %i.next, %n + br i1 %outer.continue, label %outer.loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] + ret i32 %result +} + +define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) { +; CHECK-LABEL: @cant_expand_guard_check_start +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %outer.loop.preheader + +outer.loop.preheader: + br label %outer.loop + +outer.loop: + %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %tmp6 = icmp sle i32 %l, 0 + %div = udiv i32 %i, %maybezero + br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader + +inner.loop.preheader: +; CHECK: inner.loop.preheader: +; CHECK: br label %inner.loop + br label %inner.loop + +inner.loop: +; CHECK: inner.loop: +; CHECK: %within.bounds = icmp ult i32 %j, %length +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] + %j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ] + + %within.bounds = icmp ult i32 %j, %length call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] %j.i64 = zext i32 %j to i64 diff --git a/llvm/test/Transforms/LoopPredication/visited.ll b/llvm/test/Transforms/LoopPredication/visited.ll index e9aae77f8e6..01feaeabd16 100644 --- a/llvm/test/Transforms/LoopPredication/visited.ll +++ b/llvm/test/Transforms/LoopPredication/visited.ll @@ -11,8 +11,9 @@ entry: loop.preheader: ; CHECK: loop.preheader: -; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 -; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] ; CHECK-NEXT: br label %loop br label %loop |

