diff options
36 files changed, 1528 insertions, 36 deletions
diff --git a/polly/include/polly/CodeGen/IslAst.h b/polly/include/polly/CodeGen/IslAst.h index 1d93298a94c..495b7eb88eb 100644 --- a/polly/include/polly/CodeGen/IslAst.h +++ b/polly/include/polly/CodeGen/IslAst.h @@ -49,6 +49,9 @@ struct IslAstUser { // The node is the innermost parallel loop. int IsInnermostParallel; + + // The node is only parallel because of reductions + bool IsReductionParallel; }; class IslAstInfo : public ScopPass { @@ -87,7 +90,7 @@ static inline bool isInnermostParallel(__isl_keep isl_ast_node *Node) { bool Res = false; if (Info) - Res = Info->IsInnermostParallel; + Res = Info->IsInnermostParallel && !Info->IsReductionParallel; isl_id_free(Id); return Res; } @@ -101,7 +104,7 @@ static inline bool isOutermostParallel(__isl_keep isl_ast_node *Node) { bool Res = false; if (Info) - Res = Info->IsOutermostParallel; + Res = Info->IsOutermostParallel && !Info->IsReductionParallel; isl_id_free(Id); return Res; } diff --git a/polly/include/polly/Dependences.h b/polly/include/polly/Dependences.h index 5b09ab969bf..b305660a8a5 100755 --- a/polly/include/polly/Dependences.h +++ b/polly/include/polly/Dependences.h @@ -47,23 +47,30 @@ public: /// @brief The type of the dependences. /// - /// Reduction dependences are seperated because they can be ignored during - /// the scheduling. This is the case since the order in which the reduction - /// statements are executed does not matter. However, if they are executed - /// in parallel we need to take additional measures (e.g., privatization) - /// to ensure a correct result. + /// Reduction dependences are separated from RAW/WAW/WAR dependences because + /// we can ignore them during the scheduling. This is the case since the order + /// in which the reduction statements are executed does not matter. However, + /// if they are executed in parallel we need to take additional measures + /// (e.g, privatization) to ensure a correct result. The (reverse) transitive + /// closure of the reduction dependences are used to check for parallel + /// executed reduction statements during code generation. These dependences + /// connect all instances of a reduction with each other, they are therefor + /// cyclic and possibly "reversed". enum Type { // Write after read - TYPE_WAR = 0x1, + TYPE_WAR = 1 << 0, // Read after write - TYPE_RAW = 0x2, + TYPE_RAW = 1 << 1, // Write after write - TYPE_WAW = 0x4, + TYPE_WAW = 1 << 2, // Reduction dependences - TYPE_RED = 0x8, + TYPE_RED = 1 << 3, + + // Transitive closure of the reduction dependences (& the reverse) + TYPE_TC_RED = 1 << 4, }; typedef std::map<ScopStmt *, isl_map *> StatementToIslMapTy; @@ -114,6 +121,9 @@ private: /// @brief The map of reduction dependences isl_union_map *RED = nullptr; + /// @brief The (reverse) transitive closure of reduction dependences + isl_union_map *TC_RED = nullptr; + /// @brief Collect information about the SCoP. void collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write, isl_union_map **MayWrite, isl_union_map **AccessSchedule, diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp index c4e598141ff..2a6b16d0ae7 100644 --- a/polly/lib/Analysis/Dependences.cpp +++ b/polly/lib/Analysis/Dependences.cpp @@ -124,6 +124,15 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read, } } +/// @brief Fix all dimension of @p Zero to 0 and add it to @p user +static int fixSetToZero(__isl_take isl_set *Zero, void *user) { + isl_union_set **User = (isl_union_set **)user; + for (unsigned i = 0; i < isl_set_dim(Zero, isl_dim_set); i++) + Zero = isl_set_fix_si(Zero, isl_dim_set, i, 0); + *User = isl_union_set_add_set(*User, Zero); + return 0; +} + /// @brief Compute the privatization dependences for a given dependency @p Map /// /// Privatization dependences are widened original dependences which originate @@ -164,14 +173,34 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read, /// S1[i0] -> S2[] : i0 >= 0 and i0 <= 1023} /// RED: /// { S1[i0] -> S1[1 + i0] : i0 >= 0 and i0 <= 1022 } +/// +/// Note: This function also computes the (reverse) transitive closure of the +/// reduction dependences. void Dependences::addPrivatizationDependences() { - isl_union_map *PrivRAW, *PrivWAW, *PrivWAR, *TransClosure; - - // The transitive closure might be over approximated but we only use it to - // compute the privatization dependences. Thus, overapproximation will lead - // "only" to more conservative privatization dependences. - // FIXME: Take precautions to ensure only forward dependences are created. - TransClosure = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0); + isl_union_map *PrivRAW, *PrivWAW, *PrivWAR; + + // The transitive closure might be over approximated, thus could lead to + // dependency cycles in the privatization dependences. To make sure this + // will not happen we remove all negative dependences after we computed + // the transitive closure. + TC_RED = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0); + + // FIXME: Apply the current schedule instead of assuming the identity schedule + // here. The current approach is only valid as long as we compute the + // dependences only with the initial (identity schedule). Any other + // schedule could change "the direction of the backward depenendes" we + // want to eliminate here. + isl_union_set *UDeltas = isl_union_map_deltas(isl_union_map_copy(TC_RED)); + isl_union_set *Universe = isl_union_set_universe(isl_union_set_copy(UDeltas)); + isl_union_set *Zero = isl_union_set_empty(isl_union_set_get_space(Universe)); + isl_union_set_foreach_set(Universe, fixSetToZero, &Zero); + isl_union_map *NonPositive = isl_union_set_lex_le_union_set(UDeltas, Zero); + + TC_RED = isl_union_map_subtract(TC_RED, NonPositive); + + TC_RED = isl_union_map_union( + TC_RED, isl_union_map_reverse(isl_union_map_copy(TC_RED))); + TC_RED = isl_union_map_coalesce(TC_RED); isl_union_map **Maps[] = {&RAW, &WAW, &WAR}; isl_union_map **PrivMaps[] = {&PrivRAW, &PrivWAW, &PrivWAR}; @@ -179,15 +208,15 @@ void Dependences::addPrivatizationDependences() { isl_union_map **Map = Maps[u], **PrivMap = PrivMaps[u]; *PrivMap = isl_union_map_apply_range(isl_union_map_copy(*Map), - isl_union_map_copy(TransClosure)); + isl_union_map_copy(TC_RED)); *PrivMap = isl_union_map_union( - *PrivMap, isl_union_map_apply_range(isl_union_map_copy(TransClosure), + *PrivMap, isl_union_map_apply_range(isl_union_map_copy(TC_RED), isl_union_map_copy(*Map))); *Map = isl_union_map_union(*Map, *PrivMap); } - isl_union_map_free(TransClosure); + isl_union_set_free(Universe); } void Dependences::calculateDependences(Scop &S) { @@ -330,6 +359,7 @@ void Dependences::calculateDependences(Scop &S) { WAW = isl_union_map_zip(WAW); WAR = isl_union_map_zip(WAR); RED = isl_union_map_zip(RED); + TC_RED = isl_union_map_zip(TC_RED); DEBUG(dbgs() << "Zipped Dependences:\n"; printScop(dbgs()); dbgs() << "\n"); @@ -337,6 +367,7 @@ void Dependences::calculateDependences(Scop &S) { WAW = isl_union_set_unwrap(isl_union_map_domain(WAW)); WAR = isl_union_set_unwrap(isl_union_map_domain(WAR)); RED = isl_union_set_unwrap(isl_union_map_domain(RED)); + TC_RED = isl_union_set_unwrap(isl_union_map_domain(TC_RED)); DEBUG(dbgs() << "Unwrapped Dependences:\n"; printScop(dbgs()); dbgs() << "\n"); @@ -349,6 +380,7 @@ void Dependences::calculateDependences(Scop &S) { WAW = isl_union_map_coalesce(WAW); WAR = isl_union_map_coalesce(WAR); RED = isl_union_map_coalesce(RED); + TC_RED = isl_union_map_coalesce(TC_RED); DEBUG(printScop(dbgs())); } @@ -495,6 +527,8 @@ void Dependences::printScop(raw_ostream &OS) const { printDependencyMap(OS, WAW); OS << "\tReduction dependences:\n\t\t"; printDependencyMap(OS, RED); + OS << "\tTransitive closure of reduction dependences:\n\t\t"; + printDependencyMap(OS, TC_RED); } void Dependences::releaseMemory() { @@ -502,8 +536,9 @@ void Dependences::releaseMemory() { isl_union_map_free(WAR); isl_union_map_free(WAW); isl_union_map_free(RED); + isl_union_map_free(TC_RED); - RED = RAW = WAR = WAW = nullptr; + RED = RAW = WAR = WAW = TC_RED = nullptr; } isl_union_map *Dependences::getDependences(int Kinds) { @@ -523,6 +558,9 @@ isl_union_map *Dependences::getDependences(int Kinds) { if (Kinds & TYPE_RED) Deps = isl_union_map_union(Deps, isl_union_map_copy(RED)); + if (Kinds & TYPE_TC_RED) + Deps = isl_union_map_union(Deps, isl_union_map_copy(TC_RED)); + Deps = isl_union_map_coalesce(Deps); Deps = isl_union_map_detect_equalities(Deps); return Deps; diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index da4ef7b7056..52c6bc71b5d 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -91,11 +91,15 @@ printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer, if (Info->IsInnermostParallel) { Printer = isl_printer_start_line(Printer); Printer = isl_printer_print_str(Printer, "#pragma simd"); + if (Info->IsReductionParallel) + Printer = isl_printer_print_str(Printer, " reduction"); Printer = isl_printer_end_line(Printer); } if (Info->IsOutermostParallel) { Printer = isl_printer_start_line(Printer); Printer = isl_printer_print_str(Printer, "#pragma omp parallel for"); + if (Info->IsReductionParallel) + Printer = isl_printer_print_str(Printer, " reduction"); Printer = isl_printer_end_line(Printer); } } @@ -124,6 +128,7 @@ static struct IslAstUser *allocateIslAstUser() { NodeInfo->Context = 0; NodeInfo->IsOutermostParallel = 0; NodeInfo->IsInnermostParallel = 0; + NodeInfo->IsReductionParallel = false; return NodeInfo; } @@ -148,25 +153,17 @@ static void freeIslAstUser(void *Ptr) { // dimension if it is a subset of a map with equal values for the current // dimension. static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, - Dependences *D) { - isl_union_map *Schedule, *Deps; + __isl_take isl_union_map *Deps) { + isl_union_map *Schedule; isl_map *ScheduleDeps, *Test; isl_space *ScheduleSpace; unsigned Dimension, IsParallel; - if (!D->hasValidDependences()) { - return false; - } - Schedule = isl_ast_build_get_schedule(Build); ScheduleSpace = isl_ast_build_get_schedule_space(Build); Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1; - // FIXME: We can remove ignore reduction dependences in case we privatize the - // memory locations the reduction statements reduce into. - Deps = D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW | - Dependences::TYPE_WAR | Dependences::TYPE_RED); Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule)); Deps = isl_union_map_apply_domain(Deps, Schedule); @@ -192,6 +189,35 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, return IsParallel; } +/// @brief Check if the current scheduling dimension is parallel +/// +/// In case the dimension is parallel we also check if any reduction +/// dependences is broken when we exploit this parallelism. If so, +/// @p IsReductionParallel will be set to true. The reduction dependences we use +/// to check are actually the union of the transitive closure of the initial +/// reduction dependences together with their reveresal. Even though these +/// dependences connect all iterations with each other (thus they are cyclic) +/// we can perform the parallelism check as we are only interested in a zero +/// (or non-zero) dependence distance on the dimension in question. +static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, + Dependences *D, + bool &IsReductionParallel) { + if (!D->hasValidDependences()) + return false; + + isl_union_map *Deps = D->getDependences( + Dependences::TYPE_RAW | Dependences::TYPE_WAW | Dependences::TYPE_WAR); + if (!astScheduleDimIsParallel(Build, Deps)) + return false; + + isl_union_map *RedDeps = + D->getDependences(Dependences::TYPE_TC_RED); + if (!astScheduleDimIsParallel(Build, RedDeps)) + IsReductionParallel = true; + + return true; +} + // Mark a for node openmp parallel, if it is the outermost parallel for node. static void markOpenmpParallel(__isl_keep isl_ast_build *Build, struct AstBuildUserInfo *BuildInfo, @@ -199,7 +225,8 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build, if (BuildInfo->InParallelFor) return; - if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) { + if (astScheduleDimIsParallel(Build, BuildInfo->Deps, + NodeInfo->IsReductionParallel)) { BuildInfo->InParallelFor = 1; NodeInfo->IsOutermostParallel = 1; } @@ -284,7 +311,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build, if (Info->IsOutermostParallel) BuildInfo->InParallelFor = 0; if (!containsLoops(isl_ast_node_for_get_body(Node))) - if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) + if (astScheduleDimIsParallel(Build, BuildInfo->Deps, + Info->IsReductionParallel)) Info->IsInnermostParallel = 1; if (!Info->Context) Info->Context = isl_ast_build_copy(Build); diff --git a/polly/test/Dependences/reduction_dependences_equal_non_reduction_dependences.ll b/polly/test/Dependences/reduction_dependences_equal_non_reduction_dependences.ll new file mode 100644 index 00000000000..ef798805c9d --- /dev/null +++ b/polly/test/Dependences/reduction_dependences_equal_non_reduction_dependences.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s +; +; This loopnest contains a reduction which imposes the same dependences as the +; accesses to the array A. We need to ensure we keep the dependences of A. +; +; CHECK: RAW dependences: +; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 } +; CHECK: Reduction dependences: +; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 <= 1022 and i0 >= 0 } +; +; +; void AandSum(int *restrict sum, int *restrict A) { +; for (int i = 0; i < 1024; i++) { +; A[i] = A[i] + A[i - 1]; +; A[i - 1] = A[i] + A[i - 2]; +; *sum += i; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @AandSum(i32* noalias %sum, i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %sub = add nsw i32 %i.0, -1 + %arrayidx1 = getelementptr inbounds i32* %A, i32 %sub + %tmp1 = load i32* %arrayidx1, align 4 + %add = add nsw i32 %tmp, %tmp1 + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0 + store i32 %add, i32* %arrayidx2, align 4 + %sub4 = add nsw i32 %i.0, -2 + %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4 + %tmp2 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %add, %tmp2 + %sub7 = add nsw i32 %i.0, -1 + %arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7 + store i32 %add6, i32* %arrayidx8, align 4 + %tmp3 = load i32* %sum, align 4 + %add9 = add nsw i32 %tmp3, %i.0 + store i32 %add9, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll index 248d7b8c361..b114fe6fa72 100644 --- a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll +++ b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll @@ -9,7 +9,7 @@ ; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2 ; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0 ; CHECK: Reduction dependences: -; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 } +; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 <= 1022 and i0 >= 0 } ; ; void f(int *sum) { ; for (int i = 0; i < 1024; i++) diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll index 39b9108de1f..23f4c2418e0 100644 --- a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll +++ b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll @@ -53,7 +53,6 @@ ret: ; Note that we do not delinearize this access function because it is considered ; to already be affine: {{0,+,4}<%loop.i>,+,4096}<%loop.j>. -; ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: #pragma simd ; CHECK: #pragma omp parallel for diff --git a/polly/test/Isl/Ast/reduction_dependences_equal_non_reduction_dependences.ll b/polly/test/Isl/Ast/reduction_dependences_equal_non_reduction_dependences.ll new file mode 100644 index 00000000000..b6527fb05b3 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_dependences_equal_non_reduction_dependences.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -basicaa -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; This loopnest contains a reduction which imposes the same dependences as the +; accesses to the array A. We need to ensure we do __not__ parallelize anything +; here. +; +; CHECK-NOT: pragma +; CHECK-NOT: reduction +; +; void AandSum(int *restrict sum, int *restrict A) { +; for (int i = 0; i < 1024; i++) { +; A[i] = A[i] + A[i - 1]; +; A[i - 1] = A[i] + A[i - 2]; +; *sum += i; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @AandSum(i32* noalias %sum, i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %sub = add nsw i32 %i.0, -1 + %arrayidx1 = getelementptr inbounds i32* %A, i32 %sub + %tmp1 = load i32* %arrayidx1, align 4 + %add = add nsw i32 %tmp, %tmp1 + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0 + store i32 %add, i32* %arrayidx2, align 4 + %sub4 = add nsw i32 %i.0, -2 + %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4 + %tmp2 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %add, %tmp2 + %sub7 = add nsw i32 %i.0, -1 + %arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7 + store i32 %add6, i32* %arrayidx8, align 4 + %tmp3 = load i32* %sum, align 4 + %add9 = add nsw i32 %tmp3, %i.0 + store i32 %add9, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_in_one_dimension.ll b/polly/test/Isl/Ast/reduction_in_one_dimension.ll new file mode 100644 index 00000000000..24bbd9f8d14 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_in_one_dimension.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; Verify that we won't privatize anything in the outer dimension +; +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: Stmt_for_body3(c1, c3); +; +; void foo(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; for (long j = 0; j < 1024; j++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @foo(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_loop_reversal.ll b/polly/test/Isl/Ast/reduction_loop_reversal.ll new file mode 100644 index 00000000000..25ee3d79634 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_loop_reversal.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT: #pragma simd{{\s*$}} +; CHECK: #pragma simd reduction +; CHECK: Stmt_S0(n - c1) +; CHECK: #pragma simd{{\s*$}} +; CHECK: Stmt_S1(n - c1) +; +; void rlr(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; S0: A[0] += i; +; for (long i = 0; i < 2 * n; i++) +; S1: A[i + 1] = 1; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rlr(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %tmp = load i32* %A, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %A, align 4 + br label %for.inc + +for.inc: ; preds = %S0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc8, %for.end + %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ] + %mul3 = shl nsw i32 %n, 1 + %cmp4 = icmp slt i32 %i1.0, %mul3 + br i1 %cmp4, label %for.body5, label %for.end10 + +for.body5: ; preds = %for.cond2 + br label %S1 + +S1: ; preds = %for.body5 + %add6 = add nsw i32 %i1.0, 1 + %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6 + store i32 1, i32* %arrayidx7, align 4 + br label %for.inc8 + +for.inc8: ; preds = %S1 + %inc9 = add nsw i32 %i1.0, 1 + br label %for.cond2 + +for.end10: ; preds = %for.cond2 + ret void +} + diff --git a/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule.ll b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule.ll new file mode 100644 index 00000000000..0285dbbec65 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT: #pragma simd{{\s*$}} +; CHECK: #pragma simd reduction +; CHECK: Stmt_S0(2 * n - c1) +; CHECK: #pragma simd{{\s*$}} +; CHECK: Stmt_S1 +; CHECK: #pragma simd reduction +; CHECK: Stmt_S0(2 * n - c1) +; CHECK-NOT: #pragma simd{{\s*$}} +; +; void rmalrs(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; S0: A[0] += i; +; for (long i = 0; i < 2 * n; i++) +; S1: A[i + 1] = 1; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmalrs(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %tmp = load i32* %A, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %A, align 4 + br label %for.inc + +for.inc: ; preds = %S0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc8, %for.end + %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ] + %mul3 = shl nsw i32 %n, 1 + %cmp4 = icmp slt i32 %i1.0, %mul3 + br i1 %cmp4, label %for.body5, label %for.end10 + +for.body5: ; preds = %for.cond2 + br label %S1 + +S1: ; preds = %for.body5 + %add6 = add nsw i32 %i1.0, 1 + %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6 + store i32 1, i32* %arrayidx7, align 4 + br label %for.inc8 + +for.inc8: ; preds = %S1 + %inc9 = add nsw i32 %i1.0, 1 + br label %for.cond2 + +for.end10: ; preds = %for.cond2 + ret void +} + diff --git a/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll new file mode 100644 index 00000000000..3ac8a29dc71 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll @@ -0,0 +1,76 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK: #pragma omp parallel for reduction +; CHECK: for (int c0 = 0; c0 <= 2; c0 += 1) { +; CHECK: if (c0 == 2) { +; CHECK: #pragma simd reduction +; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2) +; CHECK: Stmt_S0(c1); +; CHECK: } else if (c0 == 1) { +; CHECK: #pragma simd +; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) +; CHECK: Stmt_S1(c1); +; CHECK: } else +; CHECK: #pragma simd reduction +; CHECK: for (int c1 = -2 * n + 2; c1 <= 0; c1 += 2) +; CHECK: Stmt_S0(-c1); +; CHECK: } +; +; void rmalrs2(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; S0: A[0] += i; +; for (long i = 0; i < 2 * n; i++) +; S1: A[i + 1] = 1; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmalrs2(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %tmp = load i32* %A, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %A, align 4 + br label %for.inc + +for.inc: ; preds = %S0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc8, %for.end + %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ] + %mul3 = shl nsw i32 %n, 1 + %cmp4 = icmp slt i32 %i1.0, %mul3 + br i1 %cmp4, label %for.body5, label %for.end10 + +for.body5: ; preds = %for.cond2 + br label %S1 + +S1: ; preds = %for.body5 + %add6 = add nsw i32 %i1.0, 1 + %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6 + store i32 1, i32* %arrayidx7, align 4 + br label %for.inc8 + +for.inc8: ; preds = %S1 + %inc9 = add nsw i32 %i1.0, 1 + br label %for.cond2 + +for.end10: ; preds = %for.cond2 + ret void +} + diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule.ll b/polly/test/Isl/Ast/reduction_modulo_schedule.ll new file mode 100644 index 00000000000..f4dfef63981 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_schedule.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT: #pragma simd{{\s*$}} +; CHECK: #pragma simd reduction +; CHECK: Stmt_S0 +; CHECK: #pragma simd{{\s*$}} +; CHECK: Stmt_S1 +; CHECK: #pragma simd reduction +; CHECK: Stmt_S0 +; CHECK-NOT: #pragma simd{{\s*$}} +; +; void rms(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; S0: A[0] += i; +; for (long i = 0; i < 2 * n; i++) +; S1: A[i + 1] = 1; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rms(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %tmp = load i32* %A, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %A, align 4 + br label %for.inc + +for.inc: ; preds = %S0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc8, %for.end + %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ] + %mul3 = shl nsw i32 %n, 1 + %cmp4 = icmp slt i32 %i1.0, %mul3 + br i1 %cmp4, label %for.body5, label %for.end10 + +for.body5: ; preds = %for.cond2 + br label %S1 + +S1: ; preds = %for.body5 + %add6 = add nsw i32 %i1.0, 1 + %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6 + store i32 1, i32* %arrayidx7, align 4 + br label %for.inc8 + +for.inc8: ; preds = %S1 + %inc9 = add nsw i32 %i1.0, 1 + br label %for.cond2 + +for.end10: ; preds = %for.cond2 + ret void +} + diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll new file mode 100644 index 00000000000..af7f79990c9 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK: #pragma omp parallel for +; CHECK: for (int c0 = 0; c0 <= 1; c0 += 1) { +; CHECK: if (c0 == 1) { +; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2) +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } else +; CHECK: for (int c1 = 0; c1 < 2 * n - 1; c1 += 2) +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } +; +; void rmsmd(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; for (long j = 0; j < 1024; j++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmsmd(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll new file mode 100644 index 00000000000..34677d27501 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; Verify that the outer dimension doesnt't carry reduction dependences +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) { +; CHECK: if (c1 % 2 == 0) { +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } else +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } +; +; void rmsmd2(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; for (long j = 0; j < 1024; j++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmsmd2(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll new file mode 100644 index 00000000000..0a544d2d364 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; Verify that the outer dimension doesnt't carry reduction dependences +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) { +; CHECK: if (c3 % 2 == 0) { +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } else +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } +; +; void rmsmd3(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; for (long j = 0; j < 1024; j++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmsmd3(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll new file mode 100644 index 00000000000..8516207125c --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; Verify that the outer dimension doesnt't carry reduction dependences +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c3 = -1022; c3 <= 1023; c3 += 1) { +; CHECK: if ((c3 + 1022) % 2 == 0 && c3 <= 0) { +; CHECK: Stmt_for_body3(c1, -c3); +; CHECK: } else if ((c3 + 1023) % 2 == 0 && c3 >= 1) +; CHECK: Stmt_for_body3(c1, c3); +; CHECK: } +; +; void rmsmd4(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; for (long j = 0; j < 1024; j++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmsmd4(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll new file mode 100644 index 00000000000..a1e00b8eefe --- /dev/null +++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; Verify that only the outer dimension needs privatization +; +; CHECK: #pragma omp parallel for reduction +; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) { +; CHECK: if (c1 % 2 == 0) { +; CHECK-NOT: #pragma simd reduction +; CHECK: #pragma simd +; CHECK: for (int c3 = 0; c3 < 2 * n; c3 += 1) +; CHECK: Stmt_for_body3(c3, c1); +; CHECK: } else +; CHECK-NOT: #pragma simd reduction +; CHECK: #pragma simd +; CHECK: for (int c3 = -2 * n + 1; c3 <= 0; c3 += 1) +; CHECK: Stmt_for_body3(-c3, c1); +; CHECK: } +; +; void rmsmd5(int *A, long n) { +; for (long i = 0; i < 2 * n; i++) +; for (long j = 0; j < 1024; j++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmsmd5(i32* %A, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %mul = shl nsw i32 %n, 1 + %cmp = icmp slt i32 %i.0, %mul + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions.ll new file mode 100644 index 00000000000..98520dc3eed --- /dev/null +++ b/polly/test/Isl/Ast/reduction_multiple_dimensions.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1) +; CHECK: Stmt_for_body6(c1, c3, c5); +; +; void rmd(int *A) { +; for (long i = 0; i < 2048; i++) +; for (long j = 0; j < 1024; j++) +; for (long k = 0; k < 512; k++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond2 = icmp ne i32 %i.0, 2048 + br i1 %exitcond2, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc7, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ] + %exitcond1 = icmp ne i32 %j.0, 1024 + br i1 %exitcond1, label %for.body3, label %for.end9 + +for.body3: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %k.0, 512 + br i1 %exitcond, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body6 + %inc = add nsw i32 %k.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc7 + +for.inc7: ; preds = %for.end + %inc8 = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end9: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end9 + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions_2.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions_2.ll new file mode 100644 index 00000000000..af63bc7d325 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_multiple_dimensions_2.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1) +; CHECK: Stmt_for_body6(c1, c3, c5); +; +; void rmd2(int *A) { +; for (long i = 0; i < 2048; i++) +; for (long j = 0; j < 1024; j++) +; for (long k = 0; k < 512; k++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmd2(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond2 = icmp ne i32 %i.0, 2048 + br i1 %exitcond2, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc7, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ] + %exitcond1 = icmp ne i32 %j.0, 1024 + br i1 %exitcond1, label %for.body3, label %for.end9 + +for.body3: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %k.0, 512 + br i1 %exitcond, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body6 + %inc = add nsw i32 %k.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc7 + +for.inc7: ; preds = %for.end + %inc8 = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end9: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end9 + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions_3.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions_3.ll new file mode 100644 index 00000000000..13ae1a193b9 --- /dev/null +++ b/polly/test/Isl/Ast/reduction_multiple_dimensions_3.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1) +; CHECK: Stmt_for_body6(c1, c3, c5); +; +; void rmd3(int *A) { +; for (long i = 0; i < 2048; i++) +; for (long j = 0; j < 1024; j++) +; for (long k = 0; k < 512; k++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmd3(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond2 = icmp ne i32 %i.0, 2048 + br i1 %exitcond2, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc7, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ] + %exitcond1 = icmp ne i32 %j.0, 1024 + br i1 %exitcond1, label %for.body3, label %for.end9 + +for.body3: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %k.0, 512 + br i1 %exitcond, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body6 + %inc = add nsw i32 %k.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc7 + +for.inc7: ; preds = %for.end + %inc8 = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end9: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end9 + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions_4.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions_4.ll new file mode 100644 index 00000000000..1a8f703a1bd --- /dev/null +++ b/polly/test/Isl/Ast/reduction_multiple_dimensions_4.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK-NOT:#pragma omp parallel for reduction +; CHECK: #pragma omp parallel for +; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) +; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) +; CHECK: #pragma simd reduction +; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1) +; CHECK: Stmt_for_body6(c1, c3, c5); +; +; void rmd4(int *A) { +; for (long i = 0; i < 2048; i++) +; for (long j = 0; j < 1024; j++) +; for (long k = 0; k < 512; k++) +; A[i] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @rmd4(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond2 = icmp ne i32 %i.0, 2048 + br i1 %exitcond2, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc7, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ] + %exitcond1 = icmp ne i32 %j.0, 1024 + br i1 %exitcond1, label %for.body3, label %for.end9 + +for.body3: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %k.0, 512 + br i1 %exitcond, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4 + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body6 + %inc = add nsw i32 %k.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc7 + +for.inc7: ; preds = %for.end + %inc8 = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end9: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end9 + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Isl/Ast/rlr___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rlr___%for.cond---%for.end10.jscop new file mode 100644 index 00000000000..fc50d5c6987 --- /dev/null +++ b/polly/test/Isl/Ast/rlr___%for.cond---%for.end10.jscop @@ -0,0 +1,32 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end10", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S0", + "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, n - i0, 0] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }" + } + ], + "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S1", + "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, n - i0, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmalrs2___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rmalrs2___%for.cond---%for.end10.jscop new file mode 100644 index 00000000000..c84f0a0c889 --- /dev/null +++ b/polly/test/Isl/Ast/rmalrs2___%for.cond---%for.end10.jscop @@ -0,0 +1,32 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end10", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S0", + "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, -i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }" + } + ], + "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S1", + "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmalrs___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rmalrs___%for.cond---%for.end10.jscop new file mode 100644 index 00000000000..9740b5758ea --- /dev/null +++ b/polly/test/Isl/Ast/rmalrs___%for.cond---%for.end10.jscop @@ -0,0 +1,32 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end10", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S0", + "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, 2 * n - i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, 2 * n - i0, 0]: i0 % 2 = 1 }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }" + } + ], + "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S1", + "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmd2___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd2___%for.cond---%for.end12.jscop new file mode 100644 index 00000000000..bca97934e20 --- /dev/null +++ b/polly/test/Isl/Ast/rmd2___%for.cond---%for.end12.jscop @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end12", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }", + "name" : "Stmt_for_body6", + "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i1, 0, i0, 0, i2, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmd3___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd3___%for.cond---%for.end12.jscop new file mode 100644 index 00000000000..7cd7570a386 --- /dev/null +++ b/polly/test/Isl/Ast/rmd3___%for.cond---%for.end12.jscop @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end12", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }", + "name" : "Stmt_for_body6", + "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i1, 0, i0, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmd4___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd4___%for.cond---%for.end12.jscop new file mode 100644 index 00000000000..b29c322730b --- /dev/null +++ b/polly/test/Isl/Ast/rmd4___%for.cond---%for.end12.jscop @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end12", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }", + "name" : "Stmt_for_body6", + "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i0, 0, i1, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmd___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd___%for.cond---%for.end12.jscop new file mode 100644 index 00000000000..be7cdc2726a --- /dev/null +++ b/polly/test/Isl/Ast/rmd___%for.cond---%for.end12.jscop @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end12", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }", + "name" : "Stmt_for_body6", + "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rms___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rms___%for.cond---%for.end10.jscop new file mode 100644 index 00000000000..15a6930b81c --- /dev/null +++ b/polly/test/Isl/Ast/rms___%for.cond---%for.end10.jscop @@ -0,0 +1,32 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end10", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S0", + "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }" + } + ], + "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }", + "name" : "Stmt_S1", + "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmsmd2___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd2___%for.cond---%for.end6.jscop new file mode 100644 index 00000000000..38cb44bce1f --- /dev/null +++ b/polly/test/Isl/Ast/rmsmd2___%for.cond---%for.end6.jscop @@ -0,0 +1,21 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end6", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + } + ], + "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }", + "name" : "Stmt_for_body3", + "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 1, i1, 0]: i0 % 2 = 1 }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmsmd3___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd3___%for.cond---%for.end6.jscop new file mode 100644 index 00000000000..44d71a3e29e --- /dev/null +++ b/polly/test/Isl/Ast/rmsmd3___%for.cond---%for.end6.jscop @@ -0,0 +1,21 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end6", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + } + ], + "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }", + "name" : "Stmt_for_body3", + "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmsmd4___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd4___%for.cond---%for.end6.jscop new file mode 100644 index 00000000000..9bfd4aa944c --- /dev/null +++ b/polly/test/Isl/Ast/rmsmd4___%for.cond---%for.end6.jscop @@ -0,0 +1,21 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end6", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + } + ], + "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }", + "name" : "Stmt_for_body3", + "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, -i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmsmd5___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd5___%for.cond---%for.end6.jscop new file mode 100644 index 00000000000..ee1c3dd39ef --- /dev/null +++ b/polly/test/Isl/Ast/rmsmd5___%for.cond---%for.end6.jscop @@ -0,0 +1,21 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end6", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + } + ], + "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }", + "name" : "Stmt_for_body3", + "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i1, 0, i0, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i1, 1, -i0, 0]: i1 % 2 = 1 }" + } + ] +} diff --git a/polly/test/Isl/Ast/rmsmd___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd___%for.cond---%for.end6.jscop new file mode 100644 index 00000000000..8a8678aa447 --- /dev/null +++ b/polly/test/Isl/Ast/rmsmd___%for.cond---%for.end6.jscop @@ -0,0 +1,21 @@ +{ + "context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }", + "name" : "for.cond => for.end6", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + } + ], + "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }", + "name" : "Stmt_for_body3", + "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[1, i0, 0, i1, 0]: i0 % 2 = 1 }" + } + ] +} diff --git a/polly/test/Isl/CodeGen/reduction_simple_binary.ll b/polly/test/Isl/CodeGen/reduction_simple_binary.ll new file mode 100644 index 00000000000..e9bc1f19eb6 --- /dev/null +++ b/polly/test/Isl/CodeGen/reduction_simple_binary.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; +; CHECK: pragma simd reduction +; CHECK: pragma omp parallel for reduction +; +; int prod; +; void f() { +; for (int i = 0; i < 100; i++) +; prod *= i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +@prod = common global i32 0, align 4 + +define void @f() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i1.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add2 = add nsw i32 %i1.0, 3 + %tmp1 = load i32* @prod, align 4 + %mul3 = mul nsw i32 %tmp1, %add2 + store i32 %mul3, i32* @prod, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i1.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |