summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopDetection.h14
-rw-r--r--polly/lib/Analysis/ScopDetection.cpp42
-rw-r--r--polly/test/ScopDetect/profitability-large-basic-blocks.ll83
3 files changed, 135 insertions, 4 deletions
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h
index e30826a9254..30486770eaf 100644
--- a/polly/include/polly/ScopDetection.h
+++ b/polly/include/polly/ScopDetection.h
@@ -287,6 +287,20 @@ private:
/// @return True if all blocks in R are valid, false otherwise.
bool allBlocksValid(DetectionContext &Context) const;
+ /// @brief Check if a region has sufficient compute instructions
+ ///
+ /// This function checks if a region has a non-trivial number of instructions
+ /// in each loop. This can be used as an indicator if a loop is worth
+ /// optimising.
+ ///
+ /// @param Context The context of scop detection.
+ /// @param NumLoops The number of loops in the region.
+ ///
+ /// @return True if region is has sufficient compute instructions,
+ /// false otherwise.
+ bool hasSufficientCompute(DetectionContext &Context,
+ int NumAffineLoops) const;
+
/// @brief Check if a region is profitable to optimize.
///
/// Regions that are unlikely to expose interesting optimization opportunities
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index b2e5228d2c6..1083507b744 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -71,6 +71,16 @@ using namespace polly;
#define DEBUG_TYPE "polly-detect"
+// This option is set to a very high value, as analyzing such loops increases
+// compile time on several cases. For experiments that enable this option,
+// a value of around 40 has been working to avoid run-time regressions with
+// Polly while still exposing interesting optimization opportunities.
+static cl::opt<int> ProfitabilityMinPerLoopInstructions(
+ "polly-detect-profitability-min-per-loop-insts",
+ cl::desc("The minimal number of per-loop instructions before a single loop "
+ "region is considered profitable"),
+ cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
+
bool polly::PollyProcessUnprofitable;
static cl::opt<bool, true> XPollyProcessUnprofitable(
"polly-process-unprofitable",
@@ -1134,6 +1144,19 @@ bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
return true;
}
+bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
+ int NumLoops) const {
+ int InstCount = 0;
+
+ for (auto *BB : Context.CurRegion.blocks())
+ if (Context.CurRegion.contains(LI->getLoopFor(BB)))
+ InstCount += std::distance(BB->begin(), BB->end());
+
+ InstCount = InstCount / NumLoops;
+
+ return InstCount >= ProfitabilityMinPerLoopInstructions;
+}
+
bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
@@ -1145,13 +1168,24 @@ bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
if (!Context.hasStores || !Context.hasLoads)
return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
- // Check if there are sufficent non-overapproximated loops.
int NumLoops = countBeneficialLoops(&CurRegion);
int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
- if (NumAffineLoops < 2)
- return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
- return true;
+ // Scops with at least two loops may allow either loop fusion or tiling and
+ // are consequently interesting to look at.
+ if (NumAffineLoops >= 2)
+ return true;
+
+ // Scops that contain a loop with a non-trivial amount of computation per
+ // loop-iteration are interesting as we may be able to parallelize such
+ // loops. Individual loops that have only a small amount of computation
+ // per-iteration are performance-wise very fragile as any change to the
+ // loop induction variables may affect performance. To not cause spurious
+ // performance regressions, we do not consider such loops.
+ if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
+ return true;
+
+ return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
}
bool ScopDetection::isValidRegion(DetectionContext &Context) const {
diff --git a/polly/test/ScopDetect/profitability-large-basic-blocks.ll b/polly/test/ScopDetect/profitability-large-basic-blocks.ll
new file mode 100644
index 00000000000..268731e294d
--- /dev/null
+++ b/polly/test/ScopDetect/profitability-large-basic-blocks.ll
@@ -0,0 +1,83 @@
+; RUN: opt %loadPolly -polly-process-unprofitable=false \
+; RUN: -polly-detect-profitability-min-per-loop-insts=40 \
+; RUN: -polly-detect -analyze < %s | FileCheck %s -check-prefix=PROFITABLE
+
+; RUN: opt %loadPolly -polly-process-unprofitable=true \
+; RUN: -polly-detect -analyze < %s | FileCheck %s -check-prefix=PROFITABLE
+
+; RUN: opt %loadPolly -polly-process-unprofitable=false \
+; RUN: \
+; RUN: -polly-detect -analyze < %s | FileCheck %s -check-prefix=UNPROFITABLE
+
+; UNPROFITABLE-NOT: Valid Region for Scop:
+; PROFITABLE: Valid Region for Scop:
+
+; void foo(float *A, float *B, long N) {
+; for (long i = 0; i < 100; i++)
+; A[i] += .... / * a lot of compute */
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(float* %A, float* %B, i64 %N) {
+entry:
+ br label %header
+
+header:
+ %i.0 = phi i64 [ 0, %entry ], [ %tmp10, %header ]
+ %tmp5 = sitofp i64 %i.0 to float
+ %tmp6 = getelementptr inbounds float, float* %A, i64 %i.0
+ %tmp7 = load float, float* %tmp6, align 4
+ %tmp8 = fadd float %tmp7, %tmp5
+ %val0 = fadd float %tmp7, 1.0
+ %val1 = fadd float %val0, 1.0
+ %val2 = fadd float %val1, 1.0
+ %val3 = fadd float %val2, 1.0
+ %val4 = fadd float %val3, 1.0
+ %val5 = fadd float %val4, 1.0
+ %val6 = fadd float %val5, 1.0
+ %val7 = fadd float %val6, 1.0
+ %val8 = fadd float %val7, 1.0
+ %val9 = fadd float %val8, 1.0
+ %val10 = fadd float %val9, 1.0
+ %val11 = fadd float %val10, 1.0
+ %val12 = fadd float %val11, 1.0
+ %val13 = fadd float %val12, 1.0
+ %val14 = fadd float %val13, 1.0
+ %val15 = fadd float %val14, 1.0
+ %val16 = fadd float %val15, 1.0
+ %val17 = fadd float %val16, 1.0
+ %val18 = fadd float %val17, 1.0
+ %val19 = fadd float %val18, 1.0
+ %val20 = fadd float %val19, 1.0
+ %val21 = fadd float %val20, 1.0
+ %val22 = fadd float %val21, 1.0
+ %val23 = fadd float %val22, 1.0
+ %val24 = fadd float %val23, 1.0
+ %val25 = fadd float %val24, 1.0
+ %val26 = fadd float %val25, 1.0
+ %val27 = fadd float %val26, 1.0
+ %val28 = fadd float %val27, 1.0
+ %val29 = fadd float %val28, 1.0
+ %val30 = fadd float %val29, 1.0
+ %val31 = fadd float %val30, 1.0
+ %val32 = fadd float %val31, 1.0
+ %val33 = fadd float %val32, 1.0
+ %val34 = fadd float %val33, 1.0
+ %val35 = fadd float %val34, 1.0
+ %val36 = fadd float %val35, 1.0
+ %val37 = fadd float %val36, 1.0
+ %val38 = fadd float %val37, 1.0
+ %val39 = fadd float %val38, 1.0
+ %val40 = fadd float %val39, 1.0
+ %val41 = fadd float %val40, 1.0
+ %val42 = fadd float %val41, 1.0
+ %val43 = fadd float %val42, 1.0
+ store float %val34, float* %tmp6, align 4
+ %exitcond = icmp ne i64 %i.0, 100
+ %tmp10 = add nsw i64 %i.0, 1
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud