summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h60
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp2
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp21
-rw-r--r--llvm/test/Transforms/LoopUnroll/peel-loop.ll2
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-loop.ll34
6 files changed, 104 insertions, 16 deletions
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 9848e0d54f2..20c9a26b98c 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -10,6 +10,7 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
@@ -30,16 +31,71 @@ public:
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
+/// A set of parameters used to control various transforms performed by the
+/// LoopUnroll pass. Each of the boolean parameters can be set to:
+/// true - enabling the transformation.
+/// false - disabling the transformation.
+/// None - relying on a global default.
+///
+/// There is also OptLevel parameter, which is used for additional loop unroll
+/// tuning.
+///
+/// Intended use is to create a default object, modify parameters with
+/// additional setters and then pass it to LoopUnrollPass.
+///
+struct LoopUnrollOptions {
+ Optional<bool> AllowPartial;
+ Optional<bool> AllowPeeling;
+ Optional<bool> AllowRuntime;
+ Optional<bool> AllowUpperBound;
+ int OptLevel;
+
+ LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {}
+
+ /// Enables or disables partial unrolling. When disabled only full unrolling
+ /// is allowed.
+ LoopUnrollOptions &setPartial(bool Partial) {
+ AllowPartial = Partial;
+ return *this;
+ }
+
+ /// Enables or disables unrolling of loops with runtime trip count.
+ LoopUnrollOptions &setRuntime(bool Runtime) {
+ AllowRuntime = Runtime;
+ return *this;
+ }
+
+ /// Enables or disables loop peeling.
+ LoopUnrollOptions &setPeeling(bool Peeling) {
+ AllowPeeling = Peeling;
+ return *this;
+ }
+
+ /// Enables or disables the use of trip count upper bound
+ /// in loop unrolling.
+ LoopUnrollOptions &setUpperBound(bool UpperBound) {
+ AllowUpperBound = UpperBound;
+ return *this;
+ }
+
+ // Sets "optimization level" tuning parameter for loop unrolling.
+ LoopUnrollOptions &setOptLevel(int O) {
+ OptLevel = O;
+ return *this;
+ }
+};
+
/// Loop unroll pass that will support both full and partial unrolling.
/// It is a function pass to have access to function and module analyses.
/// It will also put loops into canonical form (simplified and LCSSA).
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
- const int OptLevel;
+ LoopUnrollOptions UnrollOpts;
public:
/// This uses the target information (or flags) to control the thresholds for
/// different unrolling stategies but supports all of them.
- explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
+ explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {})
+ : UnrollOpts(UnrollOpts) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c23c8c8d47a..0c6dfff06f1 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -830,7 +830,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
}
- OptimizePM.addPass(LoopUnrollPass(Level));
+ OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 8de4541a772..99df2ad2719 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -215,6 +215,7 @@ FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
FUNCTION_PASS("unroll", LoopUnrollPass())
+FUNCTION_PASS("unroll<peeling;no-runtime>",LoopUnrollPass(LoopUnrollOptions().setPeeling(true).setRuntime(false)))
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 34d2b2a8b27..d10dae124a7 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1333,23 +1333,20 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
Loop *ParentL = L.getParentLoop();
#endif
- // The API here is quite complex to call, but there are only two interesting
- // states we support: partial and full (or "simple") unrolling. However, to
- // enable these things we actually pass "None" in for the optional to avoid
- // providing an explicit choice.
- Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
- AllowPeeling;
// Check if the profile summary indicates that the profiled application
// has a huge working set size, in which case we disable peeling to avoid
// bloating it further.
+ Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
if (PSI && PSI->hasHugeWorkingSetSize())
- AllowPeeling = false;
+ LocalAllowPeeling = false;
std::string LoopName = L.getName();
- LoopUnrollResult Result =
- tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
- /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
- /*Threshold*/ None, AllowPartialParam, RuntimeParam,
- UpperBoundParam, AllowPeeling);
+ // The API here is quite complex to call and we allow to select some
+ // flavors of unrolling during construction time (by setting UnrollOpts).
+ LoopUnrollResult Result = tryToUnrollLoop(
+ &L, DT, &LI, SE, TTI, AC, ORE,
+ /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None,
+ /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
+ UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop.ll b/llvm/test/Transforms/LoopUnroll/peel-loop.ll
index d535414b3eb..eb3d29cb494 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop.ll
@@ -1,4 +1,6 @@
; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -simplifycfg -instcombine | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
; Basic loop peeling - check that we can peel-off the first 3 loop iterations
; when explicitly requested.
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index 34eaa4ec333..19072855d25 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -1,8 +1,16 @@
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
-
+;
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
+;
+; Restricted versions of unroll (unroll<peeling;noruntime>, unroll-full) should not be doing runtime unrolling
+; even if it is globally enabled through -unroll-runtime option
+;
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -14,22 +22,32 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+; NOEPILOG-NOT: %xtraiter = and i32 %n
+
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+; NOPROLOG-NOT: %xtraiter = and i32 %n
+
; EPILOG: for.body.epil:
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
; PROLOG: for.body.prol:
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@@ -86,6 +104,8 @@ for.end: ; preds = %for.body
; COMMON-LABEL: @foo(
; EPILOG: bb72.2:
; PROLOG: bb72.2:
+; NOEPILOG-NOT: bb72.2:
+; NOPROLOG-NOT: bb72.2:
define void @foo(i32 %trips) {
entry:
@@ -111,9 +131,15 @@ cond_true138:
; EPILOG: for.body.epil:
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
; PROLOG: for.body.prol:
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0
@@ -146,9 +172,15 @@ for.end: ; preds = %for.cond.for.end_cr
; EPILOG: for.body:
; EPILOG-NOT: for.body.epil:
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
; PROLOG: for.body:
; PROLOG-NOT: for.body.prol:
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0
OpenPOWER on IntegriCloud