diff options
-rw-r--r-- | polly/include/polly/ScopDetection.h | 14 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopDetection.cpp | 42 | ||||
-rw-r--r-- | polly/test/ScopDetect/profitability-large-basic-blocks.ll | 83 |
3 files changed, 135 insertions, 4 deletions
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h index e30826a9254..30486770eaf 100644 --- a/polly/include/polly/ScopDetection.h +++ b/polly/include/polly/ScopDetection.h @@ -287,6 +287,20 @@ private: /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; + /// @brief Check if a region has sufficient compute instructions + /// + /// This function checks if a region has a non-trivial number of instructions + /// in each loop. This can be used as an indicator if a loop is worth + /// optimising. + /// + /// @param Context The context of scop detection. + /// @param NumLoops The number of loops in the region. + /// + /// @return True if region is has sufficient compute instructions, + /// false otherwise. + bool hasSufficientCompute(DetectionContext &Context, + int NumAffineLoops) const; + /// @brief Check if a region is profitable to optimize. /// /// Regions that are unlikely to expose interesting optimization opportunities diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index b2e5228d2c6..1083507b744 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -71,6 +71,16 @@ using namespace polly; #define DEBUG_TYPE "polly-detect" +// This option is set to a very high value, as analyzing such loops increases +// compile time on several cases. For experiments that enable this option, +// a value of around 40 has been working to avoid run-time regressions with +// Polly while still exposing interesting optimization opportunities. +static cl::opt<int> ProfitabilityMinPerLoopInstructions( + "polly-detect-profitability-min-per-loop-insts", + cl::desc("The minimal number of per-loop instructions before a single loop " + "region is considered profitable"), + cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); + bool polly::PollyProcessUnprofitable; static cl::opt<bool, true> XPollyProcessUnprofitable( "polly-process-unprofitable", @@ -1134,6 +1144,19 @@ bool ScopDetection::allBlocksValid(DetectionContext &Context) const { return true; } +bool ScopDetection::hasSufficientCompute(DetectionContext &Context, + int NumLoops) const { + int InstCount = 0; + + for (auto *BB : Context.CurRegion.blocks()) + if (Context.CurRegion.contains(LI->getLoopFor(BB))) + InstCount += std::distance(BB->begin(), BB->end()); + + InstCount = InstCount / NumLoops; + + return InstCount >= ProfitabilityMinPerLoopInstructions; +} + bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -1145,13 +1168,24 @@ bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { if (!Context.hasStores || !Context.hasLoads) return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); - // Check if there are sufficent non-overapproximated loops. int NumLoops = countBeneficialLoops(&CurRegion); int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); - if (NumAffineLoops < 2) - return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); - return true; + // Scops with at least two loops may allow either loop fusion or tiling and + // are consequently interesting to look at. + if (NumAffineLoops >= 2) + return true; + + // Scops that contain a loop with a non-trivial amount of computation per + // loop-iteration are interesting as we may be able to parallelize such + // loops. Individual loops that have only a small amount of computation + // per-iteration are performance-wise very fragile as any change to the + // loop induction variables may affect performance. To not cause spurious + // performance regressions, we do not consider such loops. + if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) + return true; + + return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); } bool ScopDetection::isValidRegion(DetectionContext &Context) const { diff --git a/polly/test/ScopDetect/profitability-large-basic-blocks.ll b/polly/test/ScopDetect/profitability-large-basic-blocks.ll new file mode 100644 index 00000000000..268731e294d --- /dev/null +++ b/polly/test/ScopDetect/profitability-large-basic-blocks.ll @@ -0,0 +1,83 @@ +; RUN: opt %loadPolly -polly-process-unprofitable=false \ +; RUN: -polly-detect-profitability-min-per-loop-insts=40 \ +; RUN: -polly-detect -analyze < %s | FileCheck %s -check-prefix=PROFITABLE + +; RUN: opt %loadPolly -polly-process-unprofitable=true \ +; RUN: -polly-detect -analyze < %s | FileCheck %s -check-prefix=PROFITABLE + +; RUN: opt %loadPolly -polly-process-unprofitable=false \ +; RUN: \ +; RUN: -polly-detect -analyze < %s | FileCheck %s -check-prefix=UNPROFITABLE + +; UNPROFITABLE-NOT: Valid Region for Scop: +; PROFITABLE: Valid Region for Scop: + +; void foo(float *A, float *B, long N) { +; for (long i = 0; i < 100; i++) +; A[i] += .... / * a lot of compute */ +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(float* %A, float* %B, i64 %N) { +entry: + br label %header + +header: + %i.0 = phi i64 [ 0, %entry ], [ %tmp10, %header ] + %tmp5 = sitofp i64 %i.0 to float + %tmp6 = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp7 = load float, float* %tmp6, align 4 + %tmp8 = fadd float %tmp7, %tmp5 + %val0 = fadd float %tmp7, 1.0 + %val1 = fadd float %val0, 1.0 + %val2 = fadd float %val1, 1.0 + %val3 = fadd float %val2, 1.0 + %val4 = fadd float %val3, 1.0 + %val5 = fadd float %val4, 1.0 + %val6 = fadd float %val5, 1.0 + %val7 = fadd float %val6, 1.0 + %val8 = fadd float %val7, 1.0 + %val9 = fadd float %val8, 1.0 + %val10 = fadd float %val9, 1.0 + %val11 = fadd float %val10, 1.0 + %val12 = fadd float %val11, 1.0 + %val13 = fadd float %val12, 1.0 + %val14 = fadd float %val13, 1.0 + %val15 = fadd float %val14, 1.0 + %val16 = fadd float %val15, 1.0 + %val17 = fadd float %val16, 1.0 + %val18 = fadd float %val17, 1.0 + %val19 = fadd float %val18, 1.0 + %val20 = fadd float %val19, 1.0 + %val21 = fadd float %val20, 1.0 + %val22 = fadd float %val21, 1.0 + %val23 = fadd float %val22, 1.0 + %val24 = fadd float %val23, 1.0 + %val25 = fadd float %val24, 1.0 + %val26 = fadd float %val25, 1.0 + %val27 = fadd float %val26, 1.0 + %val28 = fadd float %val27, 1.0 + %val29 = fadd float %val28, 1.0 + %val30 = fadd float %val29, 1.0 + %val31 = fadd float %val30, 1.0 + %val32 = fadd float %val31, 1.0 + %val33 = fadd float %val32, 1.0 + %val34 = fadd float %val33, 1.0 + %val35 = fadd float %val34, 1.0 + %val36 = fadd float %val35, 1.0 + %val37 = fadd float %val36, 1.0 + %val38 = fadd float %val37, 1.0 + %val39 = fadd float %val38, 1.0 + %val40 = fadd float %val39, 1.0 + %val41 = fadd float %val40, 1.0 + %val42 = fadd float %val41, 1.0 + %val43 = fadd float %val42, 1.0 + store float %val34, float* %tmp6, align 4 + %exitcond = icmp ne i64 %i.0, 100 + %tmp10 = add nsw i64 %i.0, 1 + br i1 %exitcond, label %header, label %exit + +exit: + ret void +} |