diff options
-rw-r--r-- | polly/lib/Transform/ScheduleOptimizer.cpp | 20 | ||||
-rw-r--r-- | polly/test/ScheduleOptimizer/outer_coincidence.ll | 69 |
2 files changed, 89 insertions, 0 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 6702c53afe3..b5178796d9e 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -102,6 +102,12 @@ static cl::opt<std::string> cl::desc("Maximize the band depth (yes/no)"), cl::Hidden, cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt<std::string> OuterCoincidence( + "polly-opt-outer-coincidence", + cl::desc("Try to construct schedules where the outer member of each band " + "satisfies the coincidence constraints (yes/no)"), + cl::Hidden, cl::init("no"), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt<int> PrevectorWidth( "polly-prevect-width", cl::desc( @@ -543,6 +549,20 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) { IslMaximizeBands = 1; } + int IslOuterCoincidence; + + if (OuterCoincidence == "yes") { + IslOuterCoincidence = 1; + } else if (OuterCoincidence == "no") { + IslOuterCoincidence = 0; + } else { + errs() << "warning: Option -polly-opt-outer-coincidence should either be " + "'yes' or 'no'. Falling back to default: 'no'\n"; + IslOuterCoincidence = 0; + } + + isl_options_set_schedule_outer_coincidence(S.getIslCtx(), + IslOuterCoincidence); isl_options_set_schedule_serialize_sccs(S.getIslCtx(), IslSerializeSCCs); isl_options_set_schedule_maximize_band_depth(S.getIslCtx(), IslMaximizeBands); isl_options_set_schedule_max_constant_term(S.getIslCtx(), MaxConstantTerm); diff --git a/polly/test/ScheduleOptimizer/outer_coincidence.ll b/polly/test/ScheduleOptimizer/outer_coincidence.ll new file mode 100644 index 00000000000..13bbe1357bd --- /dev/null +++ b/polly/test/ScheduleOptimizer/outer_coincidence.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=no -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=yes -analyze < %s | FileCheck %s --check-prefix=OUTER + +; By skewing, the diagonal can be made parallel. ISL does this when the Check +; the 'outer_coincidence' option is enabled. +; +; void func(int m, int n, float A[static const restrict m][n]) { +; for (int i = 1; i < m; i+=1) +; for (int j = 1; j < n; j+=1) +; A[i][j] = A[i-1][j] + A[i][j-1]; +;} + +define void @func(i64 %m, i64 %n, float* noalias nonnull %A) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %i.0 = phi i64 [ 1, %entry ], [ %add12, %for.inc11 ] + %cmp = icmp slt i64 %i.0, %m + br i1 %cmp, label %for.cond1.preheader, label %for.end13 + +for.cond1.preheader: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.cond1.preheader, %for.body3 + %j.0 = phi i64 [ %add10, %for.body3 ], [ 1, %for.cond1.preheader ] + %cmp2 = icmp slt i64 %j.0, %n + br i1 %cmp2, label %for.body3, label %for.inc11 + +for.body3: ; preds = %for.cond1 + %sub = add nsw i64 %i.0, -1 + %tmp = mul nsw i64 %sub, %n + %arrayidx = getelementptr inbounds float, float* %A, i64 %tmp + %arrayidx4 = getelementptr inbounds float, float* %arrayidx, i64 %j.0 + %tmp13 = load float, float* %arrayidx4, align 4 + %sub5 = add nsw i64 %j.0, -1 + %tmp14 = mul nsw i64 %i.0, %n + %arrayidx6 = getelementptr inbounds float, float* %A, i64 %tmp14 + %arrayidx7 = getelementptr inbounds float, float* %arrayidx6, i64 %sub5 + %tmp15 = load float, float* %arrayidx7, align 4 + %add = fadd float %tmp13, %tmp15 + %tmp16 = mul nsw i64 %i.0, %n + %arrayidx8 = getelementptr inbounds float, float* %A, i64 %tmp16 + %arrayidx9 = getelementptr inbounds float, float* %arrayidx8, i64 %j.0 + store float %add, float* %arrayidx9, align 4 + %add10 = add nuw nsw i64 %j.0, 1 + br label %for.cond1 + +for.inc11: ; preds = %for.cond1 + %add12 = add nuw nsw i64 %i.0, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} + + +; CHECK: #pragma minimal dependence distance: 1 +; CHECK-NEXT: for (int c0 = 0; c0 < m - 1; c0 += 1) +; CHECK-NEXT: #pragma minimal dependence distance: 1 +; CHECK-NEXT: for (int c1 = 0; c1 < n - 1; c1 += 1) +; CHECK-NEXT: Stmt_for_body3(c0, c1); + +; OUTER: #pragma minimal dependence distance: 1 +; OUTER-NEXT: for (int c0 = 0; c0 < m + n - 3; c0 += 1) +; OUTER-NEXT: #pragma simd +; OUTER-NEXT: #pragma known-parallel +; OUTER-NEXT: for (int c1 = max(0, -m + c0 + 2); c1 <= min(n - 2, c0); c1 += 1) +; OUTER-NEXT: Stmt_for_body3(c0 - c1, c1); |