diff options
| author | Dehao Chen <dehao@google.com> | 2016-12-30 00:50:28 +0000 |
|---|---|---|
| committer | Dehao Chen <dehao@google.com> | 2016-12-30 00:50:28 +0000 |
| commit | cc76344ef5f2b5752759f92111efe00dd736a11e (patch) | |
| tree | d8e78eea33ff792ed68626205aa9a61afe3c810d /llvm/test/Transforms/LoopUnroll | |
| parent | 30a9b6bb4e7a2c95ea4935c2c6b9652fad1fd27b (diff) | |
| download | bcm5719-llvm-cc76344ef5f2b5752759f92111efe00dd736a11e.tar.gz bcm5719-llvm-cc76344ef5f2b5752759f92111efe00dd736a11e.zip | |
Use continuous boosting factor for complete unroll.
Summary:
The current loop complete unroll algorithm checks if unrolling complete will reduce the runtime by a certain percentage. If yes, it will apply a fixed boosting factor to the threshold (by discounting cost). The problem for this approach is that the threshold abruptly. This patch makes the boosting factor a function of runtime reduction percentage, capped by a fixed threshold. In this way, the threshold changes continuously.
The patch also simplified the code by reducing one parameter in UP.
The patch only affects code-gen of two speccpu2006 benchmark:
445.gobmk binary size decreases 0.08%, no performance change.
464.h264ref binary size increases 0.24%, no performance change.
Reviewers: mzolotukhin, chandlerc
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D26989
llvm-svn: 290737
Diffstat (limited to 'llvm/test/Transforms/LoopUnroll')
9 files changed, 15 insertions, 18 deletions
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll index f5c7734de96..9f1529139de 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll @@ -1,5 +1,5 @@ ; Check that we don't crash on corner cases. -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll index 11c9f9635ca..26124fb32ca 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=70 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll index 12ad4f26bf8..8bddb1b225c 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=40 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll index 6ee73b6fe4f..83c105ca23f 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16 diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll index 723a384ea2d..230912538d2 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; When examining gep-instructions we shouldn't consider them simplified if the diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll index dd8582e6877..a1fab3cc71e 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define i64 @propagate_loop_phis() { diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll index 904a65a1bc0..7189fbb3483 100644 --- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll @@ -17,21 +17,18 @@ ; optimizations to remove ~55% of the instructions, the loop body size is 9, ; and unrolled size is 65. -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST1 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST2 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST3 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=100 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST4 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3 -; If the absolute threshold is too low, or if we can't optimize away requested -; percent of instructions, we shouldn't unroll: +; If the absolute threshold is too low, we should not unroll: ; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv -; TEST3: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv ; Otherwise, we should: ; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv -; Also, we should unroll if the 'unroll-threshold' is big enough: -; TEST4-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv +; If we do not boost threshold, the unroll will not happen: +; TEST3: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv ; And check that we don't crash when we're not allowed to do any analysis. ; RUN: opt < %s -loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll index a68a9ef2730..49c823a28c7 100644 --- a/llvm/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll +++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-dynamic-cost-savings-discount=0 | FileCheck %s +; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s ; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold. ; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling. @@ -21,7 +21,7 @@ for.body: ; preds = %for.body, %entry %st = trunc i64 %indvars.iv to i32 store i32 %st, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 10 + %exitcond = icmp eq i64 %indvars.iv.next, 20 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll index 57bac8560d4..f7add40b9d1 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-max-percent-threshold-boost=100 | FileCheck %s @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 |

