diff options
| author | Justin Lebar <jlebar@google.com> | 2016-03-14 23:15:34 +0000 |
|---|---|---|
| committer | Justin Lebar <jlebar@google.com> | 2016-03-14 23:15:34 +0000 |
| commit | 6827de19b2aedfa8679aa85e74de0989d4559d7c (patch) | |
| tree | 322c59afe8f60e30acc6336b5947302a2778f90c /llvm/test/Transforms/LoopUnroll/convergent.ll | |
| parent | fa99425667e4e12f453ed791f1638154a9195609 (diff) | |
| download | bcm5719-llvm-6827de19b2aedfa8679aa85e74de0989d4559d7c.tar.gz bcm5719-llvm-6827de19b2aedfa8679aa85e74de0989d4559d7c.zip | |
[LoopUnroll] Respect the convergent attribute.
Summary:
Specifically, when we perform runtime loop unrolling of a loop that
contains a convergent op, we can only unroll k times, where k divides
the loop trip multiple.
Without this change, we'll happily unroll e.g. the following loop
for (int i = 0; i < N; ++i) {
if (i == 0) convergent_op();
foo();
}
into
int i = 0;
if (N % 2 == 1) {
convergent_op();
foo();
++i;
}
for (; i < N - 1; i += 2) {
if (i == 0) convergent_op();
foo();
foo();
}.
This is unsafe, because we've just added a control-flow dependency to
the convergent op in the prelude.
In general, runtime unrolling loops that contain convergent ops is safe
only if we don't have emit a prelude, which occurs when the unroll count
divides the trip multiple.
Reviewers: resistor
Subscribers: llvm-commits, mzolotukhin
Differential Revision: http://reviews.llvm.org/D17526
llvm-svn: 263509
Diffstat (limited to 'llvm/test/Transforms/LoopUnroll/convergent.ll')
| -rw-r--r-- | llvm/test/Transforms/LoopUnroll/convergent.ll | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopUnroll/convergent.ll b/llvm/test/Transforms/LoopUnroll/convergent.ll new file mode 100644 index 00000000000..4109e961bf0 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/convergent.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s + +declare void @f() convergent + +; Although this loop contains a convergent instruction, it should be +; fully unrolled. +; +; CHECK-LABEL: @full_unroll( +define i32 @full_unroll() { +entry: + br label %l3 + +l3: + %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK-NOT: call void @f() + call void @f() ;convergent + %inc = add nsw i32 %x.0, 1 + %exitcond = icmp eq i32 %inc, 3 + br i1 %exitcond, label %exit, label %l3 + +exit: + ret i32 0 +} + +; This loop contains a convergent instruction, but it should be partially +; unrolled. The unroll count is the largest power of 2 that divides the +; multiple -- 4, in this case. +; +; CHECK-LABEL: @runtime_unroll( +define i32 @runtime_unroll(i32 %n) { +entry: + %loop_ctl = mul nsw i32 %n, 12 + br label %l3 + +l3: + %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK-NOT: call void @f() + call void @f() convergent + %inc = add nsw i32 %x.0, 1 + %exitcond = icmp eq i32 %inc, %loop_ctl + br i1 %exitcond, label %exit, label %l3 + +exit: + ret i32 0 +} + +; This loop contains a convergent instruction, so its partial unroll +; count must divide its trip multiple. This overrides its unroll +; pragma -- we unroll exactly 8 times, even though 16 is requested. +; CHECK-LABEL: @pragma_unroll +define i32 @pragma_unroll(i32 %n) { +entry: + %loop_ctl = mul nsw i32 %n, 24 + br label %l3, !llvm.loop !0 + +l3: + %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK-NOT: call void @f() + call void @f() convergent + %inc = add nsw i32 %x.0, 1 + %exitcond = icmp eq i32 %inc, %loop_ctl + br i1 %exitcond, label %exit, label %l3, !llvm.loop !0 + +exit: + ret i32 0 +} + +!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}} |

