summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2018-11-16 04:24:44 +0000
committerTom Stellard <tstellar@redhat.com>2018-11-16 04:24:44 +0000
commit77480c54e9028b5aa3ea76b752c4eccaa4809773 (patch)
treecbd8a1076b1303f065de5703376558fa5e47bd02
parent24d841e026a158230561dca5f0d629078d0d965d (diff)
downloadbcm5719-llvm-77480c54e9028b5aa3ea76b752c4eccaa4809773.tar.gz
bcm5719-llvm-77480c54e9028b5aa3ea76b752c4eccaa4809773.zip
Merging r343212:
------------------------------------------------------------------------ r343212 | meinersbur | 2018-09-27 06:39:37 -0700 (Thu, 27 Sep 2018) | 37 lines [IslAst] Fix InParallelFor nesting. IslAst could mark two nested outer loops as "OutermostParallel". It caused that the code generator tried to OpenMP-parallelize both loops, which it is not prepared loop. It was because the recursive AST build algorithm managed a flag "InParallelFor" to ensure that no nested loop is also marked as "OutermostParallel". Unfortunatetly the same flag was used by nodes marked as SIMD, and reset to false after the SIMD node. Since loops can be marked as SIMD inside "OutermostParallel" loops, the recursive algorithm again tried to mark loops as "OutermostParellel" although still nested inside another "OutermostParallel" loop. The fix exposed another bug: The function "astScheduleDimIsParallel" was only called when a loop was potentially "OutermostParallel" or "InnermostParallel", but as a side-effect also determines the minimum dependence distance. Hence, changing when we need to know whether a loop is "OutermostParallel" also changed which loop was annotated with "#pragma minimal dependence distance". Moreover, some complex condition linked with "InParallelFor" determined whether a loop should be an "InnermostParallel" loop. It missed some situations where it would not use mark as such although being inside an SIMD mark node, and therefore not be annotated using "#pragma simd". The changes in particular: 1. Split the "InParallelFor" flag into an "InParallelFor" and an "InSIMD" flag. 2. Unconditionally call "astScheduleDimIsParallel" for its side-effects and store the result in "InParallel" for later use. 3. Simplify the condition when a loop is "InnermostParallel". Fixes llvm.org/PR33153 and llvm.org/PR38073. ------------------------------------------------------------------------ llvm-svn: 347024
-rw-r--r--polly/include/polly/CodeGen/IslAst.h6
-rw-r--r--polly/lib/CodeGen/IslAst.cpp38
-rw-r--r--polly/test/ScheduleOptimizer/SIMDInParallelFor.ll65
-rw-r--r--polly/test/ScheduleOptimizer/full_partial_tile_separation.ll4
4 files changed, 92 insertions, 21 deletions
diff --git a/polly/include/polly/CodeGen/IslAst.h b/polly/include/polly/CodeGen/IslAst.h
index 48ae3ffdca0..059205c629e 100644
--- a/polly/include/polly/CodeGen/IslAst.h
+++ b/polly/include/polly/CodeGen/IslAst.h
@@ -103,6 +103,10 @@ public:
/// Cleanup all isl structs on destruction.
~IslAstUserPayload();
+ /// Does the dependence analysis determine that there are no loop-carried
+ /// dependencies?
+ bool IsParallel = false;
+
/// Flag to mark innermost loops.
bool IsInnermost = false;
@@ -116,7 +120,7 @@ public:
bool IsReductionParallel = false;
/// The minimal dependence distance for non parallel loops.
- isl_pw_aff *MinimalDependenceDistance = nullptr;
+ isl::pw_aff MinimalDependenceDistance;
/// The build environment at the time this node was constructed.
isl_ast_build *Build = nullptr;
diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp
index 0923726c87b..1862950ed67 100644
--- a/polly/lib/CodeGen/IslAst.cpp
+++ b/polly/lib/CodeGen/IslAst.cpp
@@ -119,6 +119,9 @@ struct AstBuildUserInfo {
/// Flag to indicate that we are inside a parallel for node.
bool InParallelFor = false;
+ /// Flag to indicate that we are inside an SIMD node.
+ bool InSIMD = false;
+
/// The last iterator id created for the current SCoP.
isl_id *LastForNodeId = nullptr;
};
@@ -131,7 +134,6 @@ static void freeIslAstUserPayload(void *Ptr) {
IslAstInfo::IslAstUserPayload::~IslAstUserPayload() {
isl_ast_build_free(Build);
- isl_pw_aff_free(MinimalDependenceDistance);
}
/// Print a string @p str in a single line using @p Printer.
@@ -226,7 +228,10 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
Dependences::TYPE_WAR | Dependences::TYPE_TC_RED)
.release();
- D->isParallel(Schedule, DepsAll, &NodeInfo->MinimalDependenceDistance);
+ isl_pw_aff *MinimalDependenceDistance = nullptr;
+ D->isParallel(Schedule, DepsAll, &MinimalDependenceDistance);
+ NodeInfo->MinimalDependenceDistance =
+ isl::manage(MinimalDependenceDistance);
isl_union_map_free(Schedule);
return false;
}
@@ -268,10 +273,13 @@ static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
Id = isl_id_set_free_user(Id, freeIslAstUserPayload);
BuildInfo->LastForNodeId = Id;
+ Payload->IsParallel =
+ astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
+
// Test for parallelism only if we are not already inside a parallel loop
- if (!BuildInfo->InParallelFor)
+ if (!BuildInfo->InParallelFor && !BuildInfo->InSIMD)
BuildInfo->InParallelFor = Payload->IsOutermostParallel =
- astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
+ Payload->IsParallel;
return Id;
}
@@ -296,18 +304,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
Payload->Build = isl_ast_build_copy(Build);
Payload->IsInnermost = (Id == BuildInfo->LastForNodeId);
- // Innermost loops that are surrounded by parallel loops have not yet been
- // tested for parallelism. Test them here to ensure we check all innermost
- // loops for parallelism.
- if (Payload->IsInnermost && BuildInfo->InParallelFor) {
- if (Payload->IsOutermostParallel) {
- Payload->IsInnermostParallel = true;
- } else {
- if (PollyVectorizerChoice == VECTORIZER_NONE)
- Payload->IsInnermostParallel =
- astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
- }
- }
+ Payload->IsInnermostParallel =
+ Payload->IsInnermost && (BuildInfo->InSIMD || Payload->IsParallel);
if (Payload->IsOutermostParallel)
BuildInfo->InParallelFor = false;
@@ -323,7 +321,7 @@ static isl_stat astBuildBeforeMark(__isl_keep isl_id *MarkId,
AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
if (strcmp(isl_id_get_name(MarkId), "SIMD") == 0)
- BuildInfo->InParallelFor = true;
+ BuildInfo->InSIMD = true;
return isl_stat_ok;
}
@@ -335,7 +333,7 @@ astBuildAfterMark(__isl_take isl_ast_node *Node,
AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
auto *Id = isl_ast_node_mark_get_id(Node);
if (strcmp(isl_id_get_name(Id), "SIMD") == 0)
- BuildInfo->InParallelFor = false;
+ BuildInfo->InSIMD = false;
isl_id_free(Id);
return Node;
}
@@ -565,6 +563,7 @@ void IslAst::init(const Dependences &D) {
if (PerformParallelTest) {
BuildInfo.Deps = &D;
BuildInfo.InParallelFor = false;
+ BuildInfo.InSIMD = false;
Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor,
&BuildInfo);
@@ -664,8 +663,7 @@ IslAstInfo::getSchedule(__isl_keep isl_ast_node *Node) {
__isl_give isl_pw_aff *
IslAstInfo::getMinimalDependenceDistance(__isl_keep isl_ast_node *Node) {
IslAstUserPayload *Payload = getNodePayload(Node);
- return Payload ? isl_pw_aff_copy(Payload->MinimalDependenceDistance)
- : nullptr;
+ return Payload ? Payload->MinimalDependenceDistance.copy() : nullptr;
}
IslAstInfo::MemoryAccessSet *
diff --git a/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll b/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll
new file mode 100644
index 00000000000..a5d65c81caf
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly -polly-parallel -polly-vectorizer=stripmine -polly-codegen-verify -polly-opt-isl -polly-ast -polly-codegen -analyze < %s | FileCheck %s
+;
+; Check that there are no nested #pragma omp parallel for inside a
+; #pragma omp parallel for loop.
+; See llvm.org/PR38073 and llvm.org/PR33153
+;
+; This test unfortunately is very dependent on the result of the schedule
+; optimizer (-polly-opt-isl).
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@b = external dso_local unnamed_addr global [1984 x [1984 x double]], align 16
+@c = external dso_local unnamed_addr global [1984 x [1984 x double]], align 16
+
+define dso_local void @main() local_unnamed_addr {
+entry:
+ %cond = select i1 undef, i32 undef, i32 1984
+ %tmp = zext i32 %cond to i64
+ %cond63 = select i1 undef, i32 undef, i32 1984
+ %tmp1 = zext i32 %cond63 to i64
+ br label %for.cond51.preheader
+
+for.cond51.preheader:
+ %indvars.iv213 = phi i64 [ 0, %entry ], [ %indvars.iv.next214, %for.inc98 ]
+ %cond73 = select i1 undef, i32 undef, i32 1984
+ %tmp2 = zext i32 %cond73 to i64
+ br label %for.cond56.preheader
+
+for.cond56.preheader:
+ %indvars.iv223 = phi i64 [ 0, %for.cond51.preheader ], [ %indvars.iv.next224, %for.inc95 ]
+ br label %for.cond66.preheader
+
+for.cond66.preheader:
+ %indvars.iv219 = phi i64 [ %indvars.iv.next220, %for.inc92 ], [ 0, %for.cond56.preheader ]
+ br label %for.body75
+
+for.body75:
+ %indvars.iv215 = phi i64 [ %indvars.iv213, %for.cond66.preheader ], [ %indvars.iv.next216, %for.body75 ]
+ %arrayidx83 = getelementptr inbounds [1984 x [1984 x double]], [1984 x [1984 x double]]* @b, i64 0, i64 %indvars.iv219, i64 %indvars.iv215
+ %tmp3 = load double, double* %arrayidx83, align 8
+ %arrayidx87 = getelementptr inbounds [1984 x [1984 x double]], [1984 x [1984 x double]]* @c, i64 0, i64 %indvars.iv223, i64 %indvars.iv215
+ store double undef, double* %arrayidx87, align 8
+ %indvars.iv.next216 = add nuw nsw i64 %indvars.iv215, 1
+ %cmp74 = icmp ult i64 %indvars.iv.next216, %tmp2
+ br i1 %cmp74, label %for.body75, label %for.inc92
+
+for.inc92:
+ %indvars.iv.next220 = add nuw nsw i64 %indvars.iv219, 1
+ %cmp64 = icmp ult i64 %indvars.iv.next220, %tmp1
+ br i1 %cmp64, label %for.cond66.preheader, label %for.inc95
+
+for.inc95:
+ %indvars.iv.next224 = add nuw nsw i64 %indvars.iv223, 1
+ %cmp54 = icmp ult i64 %indvars.iv.next224, %tmp
+ br i1 %cmp54, label %for.cond56.preheader, label %for.inc98
+
+for.inc98:
+ %indvars.iv.next214 = add nuw nsw i64 %indvars.iv213, 48
+ br label %for.cond51.preheader
+}
+
+; No parallel loop except the to outermost.
+; CHECK: #pragma omp parallel for
+; CHECK: #pragma omp parallel for
+; CHECK-NOT: #pragma omp parallel for
diff --git a/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll b/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
index ede09e7c3fb..4a0c3315292 100644
--- a/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
+++ b/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll
@@ -5,12 +5,15 @@
; CHECK-NEXT: #pragma known-parallel
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
+; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
; CHECK-NEXT: // 1st level tiling - Points
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
+; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
+; CHECK-NEXT: #pragma simd
; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
; CHECK-NEXT: }
@@ -18,6 +21,7 @@
; CHECK-NEXT: #pragma minimal dependence distance: 1
; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
; CHECK-NEXT: // SIMD
+; CHECK-NEXT: #pragma simd
; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
; CHECK-NEXT: }
OpenPOWER on IntegriCloud