[SimpleLoopUnswitch] adding cost multiplier to cap exponential unswitch with

We need to control exponential behavior of loop-unswitch so we do not get run-away compilation. Suggested solution is to introduce a multiplier for an unswitch cost that makes cost prohibitive as soon as there are too many candidates and too many sibling loops (meaning we have already started duplicating loops by unswitching). It does solve the currently known problem with compile-time degradation (PR 39544). Tests are built on top of a recently implemented CHECK-COUNT-<num> FileCheck directives. Reviewed By: chandlerc, mkazantsev Differential Revision: https://reviews.llvm.org/D54223 llvm-svn: 347097
author: Fedor Sergeev <fedor.sergeev@azul.com> 2018-11-16 21:16:43 +0000
committer: Fedor Sergeev <fedor.sergeev@azul.com> 2018-11-16 21:16:43 +0000
commit: 2e3e224e715eeb85ca9640e19ce14cd0bd4f3f6b (patch)
tree: 1703008da9443ad1291a6f2fc8ed5928d3ad144d /llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
parent: d1840e5383a9751c534fd3715ab24a497bc54295 (diff)
download: bcm5719-llvm-2e3e224e715eeb85ca9640e19ce14cd0bd4f3f6b.tar.gz
bcm5719-llvm-2e3e224e715eeb85ca9640e19ce14cd0bd4f3f6b.zip
1 files changed, 118 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
new file mode 100644
index 00000000000..c90b2cc5ebc
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
@@ -0,0 +1,118 @@
+;
+; Here we have 5-way unswitchable switch with each successor also having an unswitchable
+; exiting branch in it. If we start unswitching those branches we start duplicating the
+; whole switch. This can easily lead to exponential behavior w/o proper control.
+; On a real-life testcase there was 16-way switch and that took forever to compile w/o
+; a cost control.
+;
+;
+; When we use the stricted multiplier candidates formula (unscaled candidates == 0)
+; we should be getting just a single loop.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) we should allow
+; some unswitches to happen until siblings multiplier starts kicking in:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b | FileCheck %s --check-prefixes=LOOP-RELAX
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed
+; siblings multiplier for top-level loops (toplevel-div == 8) we should get
+; considerably more copies of the loop (especially top-level ones).
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b | FileCheck %s --check-prefixes=LOOP-RELAX2
+;
+; We get hundreds of copies of the loop when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b | FileCheck %s --check-prefixes=LOOP-MAX
+;
+
+; Single loop nest, not unswitched
+; LOOP1:     Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+; LOOP1:     Loop at depth 2 containing:
+; LOOP1-NOT: Loop at depth 2 containing:
+;
+; Somewhat relaxed restrictions on candidates:
+; LOOP-RELAX-COUNT-5:     Loop at depth 1 containing:
+; LOOP-RELAX-NOT: Loop at depth 1 containing:
+; LOOP-RELAX-COUNT-32:     Loop at depth 2 containing:
+; LOOP-RELAX-NOT: Loop at depth 2 containing:
+;
+; Even more relaxed restrictions on candidates and siblings.
+; LOOP-RELAX2-COUNT-11:     Loop at depth 1 containing:
+; LOOP-RELAX2-NOT: Loop at depth 1 containing:
+; LOOP-RELAX2-COUNT-40:     Loop at depth 2 containing:
+; LOOP-RELAX-NOT: Loop at depth 2 containing:
+;
+; Unswitched as much as it could (with multiplier disabled).
+; LOOP-MAX-COUNT-56:     Loop at depth 1 containing:
+; LOOP-MAX-NOT: Loop at depth 1 containing:
+; LOOP-MAX-COUNT-111:     Loop at depth 2 containing:
+; LOOP-MAX-NOT: Loop at depth 2 containing:
+
+define i32 @loop_switch(i32* %addr, i32 %c1, i32 %c2) {
+entry:
+  %addr1 = getelementptr i32, i32* %addr, i64 0
+  %addr2 = getelementptr i32, i32* %addr, i64 1
+  %check0 = icmp eq i32 %c2, 0
+  %check1 = icmp eq i32 %c2, 31
+  %check2 = icmp eq i32 %c2, 32
+  %check3 = icmp eq i32 %c2, 33
+  %check4 = icmp eq i32 %c2, 34
+  br label %outer_loop
+
+outer_loop:
+  %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+  %iv1.next = add i32 %iv1, 1
+  br label %inner_loop
+inner_loop:
+  %iv2 = phi i32 [0, %outer_loop], [%iv2.next, %inner_latch]
+  %iv2.next = add i32 %iv2, 1
+  switch i32 %c1, label %inner_latch [
+    i32 0, label %case0
+    i32 1, label %case1
+    i32 2, label %case2
+    i32 3, label %case3
+    i32 4, label %case4
+  ]
+
+case4:
+  br i1 %check4, label %exit, label %inner_latch
+case3:
+  br i1 %check3, label %exit, label %inner_latch
+case2:
+  br i1 %check2, label %exit, label %inner_latch
+case1:
+  br i1 %check1, label %exit, label %inner_latch
+case0:
+  br i1 %check0, label %exit, label %inner_latch
+
+inner_latch:
+  store volatile i32 0, i32* %addr1
+  %test_inner = icmp slt i32 %iv2, 50
+  br i1 %test_inner, label %inner_loop, label %outer_latch
+
+outer_latch:
+  store volatile i32 0, i32* %addr2
+  %test_outer = icmp slt i32 %iv1, 50
+  br i1 %test_outer, label %outer_loop, label %exit
+
+exit:                                            ; preds = %bci_0
+  ret i32 1
+}
author	Fedor Sergeev <fedor.sergeev@azul.com>	2018-11-16 21:16:43 +0000
committer	Fedor Sergeev <fedor.sergeev@azul.com>	2018-11-16 21:16:43 +0000
commit	2e3e224e715eeb85ca9640e19ce14cd0bd4f3f6b (patch)
tree	1703008da9443ad1291a6f2fc8ed5928d3ad144d /llvm/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
parent	d1840e5383a9751c534fd3715ab24a497bc54295 (diff)
download	bcm5719-llvm-2e3e224e715eeb85ca9640e19ce14cd0bd4f3f6b.tar.gz bcm5719-llvm-2e3e224e715eeb85ca9640e19ce14cd0bd4f3f6b.zip