summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/CodeGen/IslAst.h6
-rw-r--r--polly/lib/CodeGen/IslAst.cpp38
-rw-r--r--polly/test/ScheduleOptimizer/SIMDInParallelFor.ll65
-rw-r--r--polly/test/ScheduleOptimizer/full_partial_tile_separation.ll4
4 files changed, 92 insertions, 21 deletions
diff --git a/polly/include/polly/CodeGen/IslAst.h b/polly/include/polly/CodeGen/IslAst.h
index 48ae3ffdca0..059205c629e 100644
--- a/polly/include/polly/CodeGen/IslAst.h
+++ b/polly/include/polly/CodeGen/IslAst.h
@@ -103,6 +103,10 @@ public:
/// Cleanup all isl structs on destruction.
~IslAstUserPayload();
+ /// Does the dependence analysis determine that there are no loop-carried
+ /// dependencies?
+ bool IsParallel = false;
+
/// Flag to mark innermost loops.
bool IsInnermost = false;
@@ -116,7 +120,7 @@ public:
bool IsReductionParallel = false;
/// The minimal dependence distance for non parallel loops.
- isl_pw_aff *MinimalDependenceDistance = nullptr;
+ isl::pw_aff MinimalDependenceDistance;
/// The build environment at the time this node was constructed.
isl_ast_build *Build = nullptr;
diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp
index 0923726c87b..1862950ed67 100644
--- a/polly/lib/CodeGen/IslAst.cpp
+++ b/polly/lib/CodeGen/IslAst.cpp
@@ -119,6 +119,9 @@ struct AstBuildUserInfo {
/// Flag to indicate that we are inside a parallel for node.
bool InParallelFor = false;
+ /// Flag to indicate that we are inside an SIMD node.
+ bool InSIMD = false;
+
/// The last iterator id created for the current SCoP.
isl_id *LastForNodeId = nullptr;
};
@@ -131,7 +134,6 @@ static void freeIslAstUserPayload(void *Ptr) {
IslAstInfo::IslAstUserPayload::~IslAstUserPayload() {
isl_ast_build_free(Build);
- isl_pw_aff_free(MinimalDependenceDistance);
}
/// Print a string @p str in a single line using @p Printer.
@@ -226,7 +228,10 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
Dependences::TYPE_WAR | Dependences::TYPE_TC_RED)
.release();
- D->isParallel(Schedule, DepsAll, &NodeInfo->MinimalDependenceDistance);
+ isl_pw_aff *MinimalDependenceDistance = nullptr;
+ D->isParallel(Schedule, DepsAll, &MinimalDependenceDistance);
+ NodeInfo->MinimalDependenceDistance =
+ isl::manage(MinimalDependenceDistance);
isl_union_map_free(Schedule);
return false;
}
@@ -268,10 +273,13 @@ static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
Id = isl_id_set_free_user(Id, freeIslAstUserPayload);
BuildInfo->LastForNodeId = Id;
+ Payload->IsParallel =
+ astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
+
// Test for parallelism only if we are not already inside a parallel loop
- if (!BuildInfo->InParallelFor)
+ if (!BuildInfo->InParallelFor && !BuildInfo->InSIMD)
BuildInfo->InParallelFor = Payload->IsOutermostParallel =
- astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
+ Payload->IsParallel;
return Id;
}
@@ -296,18 +304,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
Payload->Build = isl_ast_build_copy(Build);
Payload->IsInnermost = (Id == BuildInfo->LastForNodeId);
- // Innermost loops that are surrounded by parallel loops have not yet been
- // tested for parallelism. Test them here to ensure we check all innermost
- // loops for parallelism.
- if (Payload->IsInnermost && BuildInfo->InParallelFor) {
- if (Payload->IsOutermostParallel) {
- Payload->IsInnermostParallel = true;
- } else {
- if (PollyVectorizerChoice == VECTORIZER_NONE)
- Payload->IsInnermostParallel =
- astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
- }
- }
+ Payload->IsInnermostParallel =
+ Payload->IsInnermost && (BuildInfo->InSIMD || Payload->IsParallel);
if (Payload->IsOutermostParallel)
BuildInfo->InParallelFor = false;
@@ -323,7 +321,7 @@ static isl_stat astBuildBeforeMark(__isl_keep isl_id *MarkId,
AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
if (strcmp(isl_id_get_name(MarkId), "SIMD") == 0)
- BuildInfo->InParallelFor = true;
+ BuildInfo->InSIMD = true;
return isl_stat_ok;
}
@@ -335,7 +333,7 @@ astBuildAfterMark(__isl_take isl_ast_node *Node,
AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
auto *Id = isl_ast_node_mark_get_id(Node);
if (strcmp(isl_id_get_name(Id), "SIMD") == 0)
- BuildInfo->InParallelFor = false;
+ BuildInfo->InSIMD = false;
isl_id_free(Id);
return Node;
}
@@ -565,6 +563,7 @@ void IslAst::init(const Dependences &D) {
if (PerformParallelTest) {
BuildInfo.Deps = &D;
BuildInfo.InParallelFor = false;
+ BuildInfo.InSIMD = false;
Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor,
&BuildInfo);
@@ -664,8 +663,7 @@ IslAstInfo::getSchedule(__isl_keep isl_ast_node *Node) {
__isl_give isl_pw_aff *
IslAstInfo::getMinimalDependenceDistance(__isl_keep isl_ast_node *Node) {
IslAstUserPayload *Payload = getNodePayload(Node);
- return Payload ? isl_pw_aff_copy(Payload->MinimalDependenceDistance)
- : nullptr;
+ return Payload ? Payload->MinimalDependenceDistance.copy() : nullptr;
}
IslAstInfo::MemoryAccessSet *
diff --git a/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll b/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll
new file mode 100644
index 00000000000..a5d65c81caf
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly -polly-parallel -polly-vectorizer=stripmine -polly-codegen-verify -polly-opt-isl -polly-ast -polly-codegen -analyze < %s | FileCheck %s
+;
+; Check that there are no nested #pragma omp parallel for inside a
+; #pragma omp parallel for loop.
+; See llvm.org/PR38073 and llvm.org/PR33153
+;
+; This test unfortunately is very dependent on the result of the schedule
+; optimizer (-polly-opt-isl).
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@b = external dso_local unnamed_addr global [1984 x [1984 x double]], align 16
+@c = external dso_local unnamed_addr global [1984 x [1984 x double]], align 16
+
+define dso_local void @main() local_unnamed_addr {
+entry:
+ %cond = select i1 undef, i32 undef, i32 1984
+ %tmp = zext i32 %cond to i64
+ %cond63 = select i1 undef, i32 undef, i32 1984
+ %tmp1 = zext i32 %cond63 to i64
+ br label %for.cond51.preheader
+
+for.cond51.preheader:
+ %indvars.iv213 = phi i64 [ 0, %entry ], [ %indvars.iv.next214, %for.inc98 ]
+ %cond73 = select i1 undef, i32 undef, i32 1984
+ %tmp2 = zext i32 %cond73 to i64
+ br label %for.cond56.preheader
+
+for.cond56.preheader:
+ %indvars.iv223 = phi i64 [ 0, %for.cond51.preheader ], [ %indvars.iv.next224, %for.inc95 ]
+ br label %for.cond66.preheader
+
+for.cond66.preheader:
+ %indvars.iv219 = phi i64 [ %indvars.iv.next220, %for.inc92 ], [ 0, %for.cond56.preheader ]
+ br label %for.body75
+
+for.body75:
+ %indvars.iv215 = phi i64 [ %indvars.iv213, %for.cond66.preheader ], [ %indvars.iv.next216, %for.body75 ]
+ %arrayidx83 = getelementptr inbounds [1984 x [1984 x double]], [1984 x [1984 x double]]* @b, i64 0, i64 %indvars.iv219, i64 %indvars.iv215
+ %tmp3 = load double, double* %arrayidx83, align 8
+ %arrayidx87 = getelementptr inbounds [1984 x [1984 x double]], [1984 x [1984 x double]]* @c, i64 0, i64 %indvars.iv223, i64 %indvars.iv215
+ store double undef, double* %arrayidx87, align 8
+ %indvars.iv.next216 = add nuw nsw i64 %indvars.iv215, 1
+ %cmp74 = icmp ult i64 %indvars.iv.next216, %tmp2
+ br i1 %cmp74, label %for.body75, label %for.inc92
+
+for.inc92:
+ %indvars.iv.next220 = add nuw nsw i64 %indvars.iv219, 1
+ %cmp64 = icmp ult i64 %indvars.iv.next220, %tmp1
+ br i1 %cmp64, label %for.cond66.preheader, label %for.inc95
+
+for.inc95:
+ %indvars.iv.next224 = add nuw nsw i64 %indvars.iv223, 1
+ %cmp54 = icmp ult i64 %indvars.iv.next224, %tmp
+ br i1 %cmp54, label %for.cond56.preheader, label %for.inc98
+
+for.inc98:
+ %indvars.iv.next214 = add nuw nsw i64 %indvars.iv213, 48
+ br label %for.cond51.preheader
+}
+
+; No parallel loop except the to outermost.
+; CHECK: #pragma omp parallel for
+; CHECK: #pragma omp parallel for
+; CHECK-NOT: #pragma omp parallel for
diff --git a/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll b/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
index ede09e7c3fb..4a0c3315292 100644
--- a/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
+++ b/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
@@ -5,12 +5,15 @@
; CHECK-NEXT: #pragma known-parallel
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
+; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
+; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
+; CHECK-NEXT: #pragma simd
; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
; CHECK-NEXT: }
@@ -18,6 +21,7 @@
; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
+; CHECK-NEXT: #pragma simd
; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
; CHECK-NEXT: }
OpenPOWER on IntegriCloud