summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopUnroll
diff options
context:
space:
mode:
authorAndrei Elovikov <andrei.elovikov@intel.com>2018-03-15 09:59:15 +0000
committerAndrei Elovikov <andrei.elovikov@intel.com>2018-03-15 09:59:15 +0000
commitf9b8035f3c9b60763bfddf317123101a652d9e5f (patch)
treea1ae4e8d6ece90b80d159845a3f142fbace2d2d9 /llvm/test/Transforms/LoopUnroll
parent211e94d6666d5e1bb5c4e622d6054270927b0ebe (diff)
downloadbcm5719-llvm-f9b8035f3c9b60763bfddf317123101a652d9e5f.tar.gz
bcm5719-llvm-f9b8035f3c9b60763bfddf317123101a652d9e5f.zip
[LoopUnroll] Ignore ephemeral values when checking full unroll profitability.
Summary: Before this patch call graph is like this in the LoopUnrollPass: tryToUnrollLoop ApproximateLoopSize collectEphemeralValues /* Use collected ephemeral values */ computeUnrollCount analyzeLoopUnrollCost /* Bail out from the analysis if loop contains CallInst */ This patch moves collection of the ephemeral values to the tryToUnrollLoop function and passes the collected values into both ApproximateLoopsize (as before) and additionally starts using them in analyzeLoopUnrollCost: tryToUnrollLoop collectEphemeralValues ApproximateLoopSize(EphValues) /* Use EphValues */ computeUnrollCount(EphValues) analyzeLoopUnrollCost(EphValues) /* Ignore ephemeral values - they don't contribute to the final cost */ /* Bail out from the analysis if loop contains CallInst */ Reviewers: mzolotukhin, evstupac, sanjoy Reviewed By: evstupac Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43931 llvm-svn: 327617
Diffstat (limited to 'llvm/test/Transforms/LoopUnroll')
-rw-r--r--llvm/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll119
1 files changed, 119 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll b/llvm/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll
new file mode 100644
index 00000000000..6ecac9625e2
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -loop-unroll -unroll-threshold=42 | FileCheck %s --check-prefix=ANALYZE-FULL
+
+; This test is supposed to check that calls to @llvm.assume builtin are not
+; prohibiting the analysis of full unroll profitability in case the cost of the
+; unrolled loop (not acounting to any simplifications done by such unrolling) is
+; higher than some threshold.
+;
+; Ensure that we indeed are testing this code path by verifying that the loop is
+; not unrolled without such analysis:
+
+; RUN: opt -S < %s -loop-unroll -unroll-threshold=42 -unroll-max-iteration-count-to-analyze=2 \
+; RUN: | FileCheck %s --check-prefix=DONT-ANALYZE-FULL
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+define i32 @foo(i32* %a) {
+; ANALYZE-FULL-LABEL: @foo(
+; ANALYZE-FULL-NEXT: entry:
+; ANALYZE-FULL-NEXT: br label [[FOR_BODY:%.*]]
+; ANALYZE-FULL: for.body:
+; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE:%.*]], label [[FOR_NEXT:%.*]]
+; ANALYZE-FULL: do_store:
+; ANALYZE-FULL-NEXT: store i32 0, i32* [[A:%.*]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT]]
+; ANALYZE-FULL: for.next:
+; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE_1:%.*]], label [[FOR_NEXT_1:%.*]]
+; ANALYZE-FULL: do_store.1:
+; ANALYZE-FULL-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[A]], i32 1
+; ANALYZE-FULL-NEXT: store i32 1, i32* [[GEP_1]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_1]]
+; ANALYZE-FULL: for.next.1:
+; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE_2:%.*]], label [[FOR_NEXT_2:%.*]]
+; ANALYZE-FULL: do_store.2:
+; ANALYZE-FULL-NEXT: [[GEP_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
+; ANALYZE-FULL-NEXT: store i32 2, i32* [[GEP_2]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_2]]
+; ANALYZE-FULL: for.next.2:
+; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE_3:%.*]], label [[FOR_NEXT_3:%.*]]
+; ANALYZE-FULL: do_store.3:
+; ANALYZE-FULL-NEXT: [[GEP_3:%.*]] = getelementptr i32, i32* [[A]], i32 3
+; ANALYZE-FULL-NEXT: store i32 3, i32* [[GEP_3]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_3]]
+; ANALYZE-FULL: for.next.3:
+; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_4:%.*]], label [[FOR_NEXT_4:%.*]]
+; ANALYZE-FULL: do_store.4:
+; ANALYZE-FULL-NEXT: [[GEP_4:%.*]] = getelementptr i32, i32* [[A]], i32 4
+; ANALYZE-FULL-NEXT: store i32 4, i32* [[GEP_4]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_4]]
+; ANALYZE-FULL: for.next.4:
+; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_5:%.*]], label [[FOR_NEXT_5:%.*]]
+; ANALYZE-FULL: do_store.5:
+; ANALYZE-FULL-NEXT: [[GEP_5:%.*]] = getelementptr i32, i32* [[A]], i32 5
+; ANALYZE-FULL-NEXT: store i32 5, i32* [[GEP_5]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_5]]
+; ANALYZE-FULL: for.next.5:
+; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_6:%.*]], label [[FOR_NEXT_6:%.*]]
+; ANALYZE-FULL: do_store.6:
+; ANALYZE-FULL-NEXT: [[GEP_6:%.*]] = getelementptr i32, i32* [[A]], i32 6
+; ANALYZE-FULL-NEXT: store i32 6, i32* [[GEP_6]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_6]]
+; ANALYZE-FULL: for.next.6:
+; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_7:%.*]], label [[FOR_NEXT_7:%.*]]
+; ANALYZE-FULL: do_store.7:
+; ANALYZE-FULL-NEXT: [[GEP_7:%.*]] = getelementptr i32, i32* [[A]], i32 7
+; ANALYZE-FULL-NEXT: store i32 7, i32* [[GEP_7]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_7]]
+; ANALYZE-FULL: for.next.7:
+; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_8:%.*]], label [[FOR_NEXT_8:%.*]]
+; ANALYZE-FULL: do_store.8:
+; ANALYZE-FULL-NEXT: [[GEP_8:%.*]] = getelementptr i32, i32* [[A]], i32 8
+; ANALYZE-FULL-NEXT: store i32 8, i32* [[GEP_8]]
+; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_8]]
+; ANALYZE-FULL: for.next.8:
+; ANALYZE-FULL-NEXT: ret i32 9
+;
+; DONT-ANALYZE-FULL-LABEL: @foo(
+; DONT-ANALYZE-FULL-NEXT: entry:
+; DONT-ANALYZE-FULL-NEXT: br label [[FOR_BODY:%.*]]
+; DONT-ANALYZE-FULL: for.body:
+; DONT-ANALYZE-FULL-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_NEXT:%.*]] ]
+; DONT-ANALYZE-FULL-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
+; DONT-ANALYZE-FULL-NEXT: [[CMP:%.*]] = icmp ule i32 [[INDVAR]], 20
+; DONT-ANALYZE-FULL-NEXT: tail call void @llvm.assume(i1 [[CMP]])
+; DONT-ANALYZE-FULL-NEXT: [[CMP2:%.*]] = icmp ule i32 [[INDVAR]], 3
+; DONT-ANALYZE-FULL-NEXT: br i1 [[CMP2]], label [[DO_STORE:%.*]], label [[FOR_NEXT]]
+; DONT-ANALYZE-FULL: do_store:
+; DONT-ANALYZE-FULL-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDVAR]]
+; DONT-ANALYZE-FULL-NEXT: store i32 [[INDVAR]], i32* [[GEP]]
+; DONT-ANALYZE-FULL-NEXT: br label [[FOR_NEXT]]
+; DONT-ANALYZE-FULL: for.next:
+; DONT-ANALYZE-FULL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INDVAR_NEXT]], 9
+; DONT-ANALYZE-FULL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEXIT:%.*]]
+; DONT-ANALYZE-FULL: loopexit:
+; DONT-ANALYZE-FULL-NEXT: [[INDVAR_NEXT_LCSSA:%.*]] = phi i32 [ [[INDVAR_NEXT]], [[FOR_NEXT]] ]
+; DONT-ANALYZE-FULL-NEXT: ret i32 [[INDVAR_NEXT_LCSSA]]
+;
+entry:
+ br label %for.body
+for.body:
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %for.next ]
+ %indvar.next = add i32 %indvar, 1
+ %cmp = icmp ule i32 %indvar, 20
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp2 = icmp ule i32 %indvar, 3
+ br i1 %cmp2, label %do_store, label %for.next
+
+do_store:
+ %gep = getelementptr i32, i32* %a, i32 %indvar
+ store i32 %indvar, i32* %gep
+ br label %for.next
+
+for.next:
+ %exitcond = icmp ne i32 %indvar.next, 9
+ br i1 %exitcond, label %for.body, label %loopexit
+loopexit:
+ ret i32 %indvar.next
+}
OpenPOWER on IntegriCloud