36 files changed, 1528 insertions, 36 deletions
diff --git a/polly/include/polly/CodeGen/IslAst.h b/polly/include/polly/CodeGen/IslAst.h
index 1d93298a94c..495b7eb88eb 100644
--- a/polly/include/polly/CodeGen/IslAst.h
+++ b/polly/include/polly/CodeGen/IslAst.h
@@ -49,6 +49,9 @@ struct IslAstUser {
 
   // The node is the innermost parallel loop.
   int IsInnermostParallel;
+
+  // The node is only parallel because of reductions
+  bool IsReductionParallel;
 };
 
 class IslAstInfo : public ScopPass {
@@ -87,7 +90,7 @@ static inline bool isInnermostParallel(__isl_keep isl_ast_node *Node) {
 
   bool Res = false;
   if (Info)
-    Res = Info->IsInnermostParallel;
+    Res = Info->IsInnermostParallel && !Info->IsReductionParallel;
   isl_id_free(Id);
   return Res;
 }
@@ -101,7 +104,7 @@ static inline bool isOutermostParallel(__isl_keep isl_ast_node *Node) {
 
   bool Res = false;
   if (Info)
-    Res = Info->IsOutermostParallel;
+    Res = Info->IsOutermostParallel && !Info->IsReductionParallel;
   isl_id_free(Id);
   return Res;
 }
diff --git a/polly/include/polly/Dependences.h b/polly/include/polly/Dependences.h
index 5b09ab969bf..b305660a8a5 100755
--- a/polly/include/polly/Dependences.h
+++ b/polly/include/polly/Dependences.h
@@ -47,23 +47,30 @@ public:
 
   /// @brief The type of the dependences.
   ///
-  /// Reduction dependences are seperated because they can be ignored during
-  /// the scheduling. This is the case since the order in which the reduction
-  /// statements are executed does not matter. However, if they are executed
-  /// in parallel we need to take additional measures (e.g., privatization)
-  /// to ensure a correct result.
+  /// Reduction dependences are separated from RAW/WAW/WAR dependences because
+  /// we can ignore them during the scheduling. This is the case since the order
+  /// in which the reduction statements are executed does not matter. However,
+  /// if they are executed in parallel we need to take additional measures
+  /// (e.g, privatization) to ensure a correct result. The (reverse) transitive
+  /// closure of the reduction dependences are used to check for parallel
+  /// executed reduction statements during code generation. These dependences
+  /// connect all instances of a reduction with each other, they are therefor
+  /// cyclic and possibly "reversed".
   enum Type {
     // Write after read
-    TYPE_WAR = 0x1,
+    TYPE_WAR = 1 << 0,
 
     // Read after write
-    TYPE_RAW = 0x2,
+    TYPE_RAW = 1 << 1,
 
     // Write after write
-    TYPE_WAW = 0x4,
+    TYPE_WAW = 1 << 2,
 
     // Reduction dependences
-    TYPE_RED = 0x8,
+    TYPE_RED = 1 << 3,
+
+    // Transitive closure of the reduction dependences (& the reverse)
+    TYPE_TC_RED = 1 << 4,
   };
 
   typedef std::map<ScopStmt *, isl_map *> StatementToIslMapTy;
@@ -114,6 +121,9 @@ private:
   /// @brief The map of reduction dependences
   isl_union_map *RED = nullptr;
 
+  /// @brief The (reverse) transitive closure of reduction dependences
+  isl_union_map *TC_RED = nullptr;
+
   /// @brief Collect information about the SCoP.
   void collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write,
                    isl_union_map **MayWrite, isl_union_map **AccessSchedule,
diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp
index c4e598141ff..2a6b16d0ae7 100644
--- a/polly/lib/Analysis/Dependences.cpp
+++ b/polly/lib/Analysis/Dependences.cpp
@@ -124,6 +124,15 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read,
   }
 }
 
+/// @brief Fix all dimension of @p Zero to 0 and add it to @p user
+static int fixSetToZero(__isl_take isl_set *Zero, void *user) {
+  isl_union_set **User = (isl_union_set **)user;
+  for (unsigned i = 0; i < isl_set_dim(Zero, isl_dim_set); i++)
+    Zero = isl_set_fix_si(Zero, isl_dim_set, i, 0);
+  *User = isl_union_set_add_set(*User, Zero);
+  return 0;
+}
+
 /// @brief Compute the privatization dependences for a given dependency @p Map
 ///
 /// Privatization dependences are widened original dependences which originate
@@ -164,14 +173,34 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read,
 ///       S1[i0] -> S2[] : i0 >= 0 and i0 <= 1023}
 ///   RED:
 ///     { S1[i0] -> S1[1 + i0] : i0 >= 0 and i0 <= 1022 }
+///
+/// Note: This function also computes the (reverse) transitive closure of the
+///       reduction dependences.
 void Dependences::addPrivatizationDependences() {
-  isl_union_map *PrivRAW, *PrivWAW, *PrivWAR, *TransClosure;
-
-  // The transitive closure might be over approximated but we only use it to
-  // compute the privatization dependences. Thus, overapproximation will lead
-  // "only" to more conservative privatization dependences.
-  // FIXME: Take precautions to ensure only forward dependences are created.
-  TransClosure = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0);
+  isl_union_map *PrivRAW, *PrivWAW, *PrivWAR;
+
+  // The transitive closure might be over approximated, thus could lead to
+  // dependency cycles in the privatization dependences. To make sure this
+  // will not happen we remove all negative dependences after we computed
+  // the transitive closure.
+  TC_RED = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0);
+
+  // FIXME: Apply the current schedule instead of assuming the identity schedule
+  //        here. The current approach is only valid as long as we compute the
+  //        dependences only with the initial (identity schedule). Any other
+  //        schedule could change "the direction of the backward depenendes" we
+  //        want to eliminate here.
+  isl_union_set *UDeltas = isl_union_map_deltas(isl_union_map_copy(TC_RED));
+  isl_union_set *Universe = isl_union_set_universe(isl_union_set_copy(UDeltas));
+  isl_union_set *Zero = isl_union_set_empty(isl_union_set_get_space(Universe));
+  isl_union_set_foreach_set(Universe, fixSetToZero, &Zero);
+  isl_union_map *NonPositive = isl_union_set_lex_le_union_set(UDeltas, Zero);
+
+  TC_RED = isl_union_map_subtract(TC_RED, NonPositive);
+
+  TC_RED = isl_union_map_union(
+      TC_RED, isl_union_map_reverse(isl_union_map_copy(TC_RED)));
+  TC_RED = isl_union_map_coalesce(TC_RED);
 
   isl_union_map **Maps[] = {&RAW, &WAW, &WAR};
   isl_union_map **PrivMaps[] = {&PrivRAW, &PrivWAW, &PrivWAR};
@@ -179,15 +208,15 @@ void Dependences::addPrivatizationDependences() {
     isl_union_map **Map = Maps[u], **PrivMap = PrivMaps[u];
 
     *PrivMap = isl_union_map_apply_range(isl_union_map_copy(*Map),
-                                         isl_union_map_copy(TransClosure));
+                                         isl_union_map_copy(TC_RED));
     *PrivMap = isl_union_map_union(
-        *PrivMap, isl_union_map_apply_range(isl_union_map_copy(TransClosure),
+        *PrivMap, isl_union_map_apply_range(isl_union_map_copy(TC_RED),
                                             isl_union_map_copy(*Map)));
 
     *Map = isl_union_map_union(*Map, *PrivMap);
   }
 
-  isl_union_map_free(TransClosure);
+  isl_union_set_free(Universe);
 }
 
 void Dependences::calculateDependences(Scop &S) {
@@ -330,6 +359,7 @@ void Dependences::calculateDependences(Scop &S) {
   WAW = isl_union_map_zip(WAW);
   WAR = isl_union_map_zip(WAR);
   RED = isl_union_map_zip(RED);
+  TC_RED = isl_union_map_zip(TC_RED);
 
   DEBUG(dbgs() << "Zipped Dependences:\n"; printScop(dbgs()); dbgs() << "\n");
 
@@ -337,6 +367,7 @@ void Dependences::calculateDependences(Scop &S) {
   WAW = isl_union_set_unwrap(isl_union_map_domain(WAW));
   WAR = isl_union_set_unwrap(isl_union_map_domain(WAR));
   RED = isl_union_set_unwrap(isl_union_map_domain(RED));
+  TC_RED = isl_union_set_unwrap(isl_union_map_domain(TC_RED));
 
   DEBUG(dbgs() << "Unwrapped Dependences:\n"; printScop(dbgs());
         dbgs() << "\n");
@@ -349,6 +380,7 @@ void Dependences::calculateDependences(Scop &S) {
   WAW = isl_union_map_coalesce(WAW);
   WAR = isl_union_map_coalesce(WAR);
   RED = isl_union_map_coalesce(RED);
+  TC_RED = isl_union_map_coalesce(TC_RED);
 
   DEBUG(printScop(dbgs()));
 }
@@ -495,6 +527,8 @@ void Dependences::printScop(raw_ostream &OS) const {
   printDependencyMap(OS, WAW);
   OS << "\tReduction dependences:\n\t\t";
   printDependencyMap(OS, RED);
+  OS << "\tTransitive closure of reduction dependences:\n\t\t";
+  printDependencyMap(OS, TC_RED);
 }
 
 void Dependences::releaseMemory() {
@@ -502,8 +536,9 @@ void Dependences::releaseMemory() {
   isl_union_map_free(WAR);
   isl_union_map_free(WAW);
   isl_union_map_free(RED);
+  isl_union_map_free(TC_RED);
 
-  RED = RAW = WAR = WAW = nullptr;
+  RED = RAW = WAR = WAW = TC_RED = nullptr;
 }
 
 isl_union_map *Dependences::getDependences(int Kinds) {
@@ -523,6 +558,9 @@ isl_union_map *Dependences::getDependences(int Kinds) {
   if (Kinds & TYPE_RED)
     Deps = isl_union_map_union(Deps, isl_union_map_copy(RED));
 
+  if (Kinds & TYPE_TC_RED)
+    Deps = isl_union_map_union(Deps, isl_union_map_copy(TC_RED));
+
   Deps = isl_union_map_coalesce(Deps);
   Deps = isl_union_map_detect_equalities(Deps);
   return Deps;
diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp
index da4ef7b7056..52c6bc71b5d 100644
--- a/polly/lib/CodeGen/IslAst.cpp
+++ b/polly/lib/CodeGen/IslAst.cpp
@@ -91,11 +91,15 @@ printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
     if (Info->IsInnermostParallel) {
       Printer = isl_printer_start_line(Printer);
       Printer = isl_printer_print_str(Printer, "#pragma simd");
+      if (Info->IsReductionParallel)
+        Printer = isl_printer_print_str(Printer, " reduction");
       Printer = isl_printer_end_line(Printer);
     }
     if (Info->IsOutermostParallel) {
       Printer = isl_printer_start_line(Printer);
       Printer = isl_printer_print_str(Printer, "#pragma omp parallel for");
+      if (Info->IsReductionParallel)
+        Printer = isl_printer_print_str(Printer, " reduction");
       Printer = isl_printer_end_line(Printer);
     }
   }
@@ -124,6 +128,7 @@ static struct IslAstUser *allocateIslAstUser() {
   NodeInfo->Context = 0;
   NodeInfo->IsOutermostParallel = 0;
   NodeInfo->IsInnermostParallel = 0;
+  NodeInfo->IsReductionParallel = false;
   return NodeInfo;
 }
 
@@ -148,25 +153,17 @@ static void freeIslAstUser(void *Ptr) {
 // dimension if it is a subset of a map with equal values for the current
 // dimension.
 static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
-                                     Dependences *D) {
-  isl_union_map *Schedule, *Deps;
+                                     __isl_take isl_union_map *Deps) {
+  isl_union_map *Schedule;
   isl_map *ScheduleDeps, *Test;
   isl_space *ScheduleSpace;
   unsigned Dimension, IsParallel;
 
-  if (!D->hasValidDependences()) {
-    return false;
-  }
-
   Schedule = isl_ast_build_get_schedule(Build);
   ScheduleSpace = isl_ast_build_get_schedule_space(Build);
 
   Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1;
 
-  // FIXME: We can remove ignore reduction dependences in case we privatize the
-  //        memory locations the reduction statements reduce into.
-  Deps = D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
-                           Dependences::TYPE_WAR | Dependences::TYPE_RED);
   Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule));
   Deps = isl_union_map_apply_domain(Deps, Schedule);
 
@@ -192,6 +189,35 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
   return IsParallel;
 }
 
+/// @brief Check if the current scheduling dimension is parallel
+///
+/// In case the dimension is parallel we also check if any reduction
+/// dependences is broken when we exploit this parallelism. If so,
+/// @p IsReductionParallel will be set to true. The reduction dependences we use
+/// to check are actually the union of the transitive closure of the initial
+/// reduction dependences together with their reveresal. Even though these
+/// dependences connect all iterations with each other (thus they are cyclic)
+/// we can perform the parallelism check as we are only interested in a zero
+/// (or non-zero) dependence distance on the dimension in question.
+static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
+                                     Dependences *D,
+                                     bool &IsReductionParallel) {
+  if (!D->hasValidDependences())
+    return false;
+
+  isl_union_map *Deps = D->getDependences(
+      Dependences::TYPE_RAW | Dependences::TYPE_WAW | Dependences::TYPE_WAR);
+  if (!astScheduleDimIsParallel(Build, Deps))
+    return false;
+
+  isl_union_map *RedDeps =
+      D->getDependences(Dependences::TYPE_TC_RED);
+  if (!astScheduleDimIsParallel(Build, RedDeps))
+    IsReductionParallel = true;
+
+  return true;
+}
+
 // Mark a for node openmp parallel, if it is the outermost parallel for node.
 static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
                                struct AstBuildUserInfo *BuildInfo,
@@ -199,7 +225,8 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
   if (BuildInfo->InParallelFor)
     return;
 
-  if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) {
+  if (astScheduleDimIsParallel(Build, BuildInfo->Deps,
+                               NodeInfo->IsReductionParallel)) {
     BuildInfo->InParallelFor = 1;
     NodeInfo->IsOutermostParallel = 1;
   }
@@ -284,7 +311,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
     if (Info->IsOutermostParallel)
       BuildInfo->InParallelFor = 0;
     if (!containsLoops(isl_ast_node_for_get_body(Node)))
-      if (astScheduleDimIsParallel(Build, BuildInfo->Deps))
+      if (astScheduleDimIsParallel(Build, BuildInfo->Deps,
+                                   Info->IsReductionParallel))
         Info->IsInnermostParallel = 1;
     if (!Info->Context)
       Info->Context = isl_ast_build_copy(Build);
diff --git a/polly/test/Dependences/reduction_dependences_equal_non_reduction_dependences.ll b/polly/test/Dependences/reduction_dependences_equal_non_reduction_dependences.ll
new file mode 100644
index 00000000000..ef798805c9d
--- /dev/null
+++ b/polly/test/Dependences/reduction_dependences_equal_non_reduction_dependences.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s
+;
+; This loopnest contains a reduction which imposes the same dependences as the
+; accesses to the array A. We need to ensure we keep the dependences of A.
+;
+; CHECK: RAW dependences:
+; CHECK:   { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 }
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 <= 1022 and i0 >= 0 }
+;
+;
+;    void AandSum(int *restrict sum, int *restrict A) {
+;      for (int i = 0; i < 1024; i++) {
+;        A[i] = A[i] + A[i - 1];
+;        A[i - 1] = A[i] + A[i - 2];
+;        *sum += i;
+;      }
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @AandSum(i32* noalias %sum, i32* noalias %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %sub = add nsw i32 %i.0, -1
+  %arrayidx1 = getelementptr inbounds i32* %A, i32 %sub
+  %tmp1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %sub4 = add nsw i32 %i.0, -2
+  %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4
+  %tmp2 = load i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %add, %tmp2
+  %sub7 = add nsw i32 %i.0, -1
+  %arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7
+  store i32 %add6, i32* %arrayidx8, align 4
+  %tmp3 = load i32* %sum, align 4
+  %add9 = add nsw i32 %tmp3, %i.0
+  store i32 %add9, i32* %sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
index 248d7b8c361..b114fe6fa72 100644
--- a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
+++ b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
@@ -9,7 +9,7 @@
 ; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2
 ; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0
 ; CHECK: Reduction dependences:
-; CHECK:   { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 }
+; CHECK:   { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 <= 1022 and i0 >= 0 }
 ;
 ; void f(int *sum) {
 ;   for (int i = 0; i < 1024; i++)
diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
index 39b9108de1f..23f4c2418e0 100644
--- a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
+++ b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
@@ -53,7 +53,6 @@ ret:
 ; Note that we do not delinearize this access function because it is considered
 ; to already be affine: {{0,+,4}<%loop.i>,+,4096}<%loop.j>.
 
-;
 ; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
 ; CHECK:   #pragma simd
 ; CHECK:   #pragma omp parallel for
diff --git a/polly/test/Isl/Ast/reduction_dependences_equal_non_reduction_dependences.ll b/polly/test/Isl/Ast/reduction_dependences_equal_non_reduction_dependences.ll
new file mode 100644
index 00000000000..b6527fb05b3
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_dependences_equal_non_reduction_dependences.ll
@@ -0,0 +1,56 @@
+; RUN: opt %loadPolly -basicaa -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; This loopnest contains a reduction which imposes the same dependences as the
+; accesses to the array A. We need to ensure we do __not__ parallelize anything
+; here.
+;
+; CHECK-NOT: pragma
+; CHECK-NOT: reduction
+;
+;    void AandSum(int *restrict sum, int *restrict A) {
+;      for (int i = 0; i < 1024; i++) {
+;        A[i] = A[i] + A[i - 1];
+;        A[i - 1] = A[i] + A[i - 2];
+;        *sum += i;
+;      }
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @AandSum(i32* noalias %sum, i32* noalias %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %sub = add nsw i32 %i.0, -1
+  %arrayidx1 = getelementptr inbounds i32* %A, i32 %sub
+  %tmp1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %sub4 = add nsw i32 %i.0, -2
+  %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4
+  %tmp2 = load i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %add, %tmp2
+  %sub7 = add nsw i32 %i.0, -1
+  %arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7
+  store i32 %add6, i32* %arrayidx8, align 4
+  %tmp3 = load i32* %sum, align 4
+  %add9 = add nsw i32 %tmp3, %i.0
+  store i32 %add9, i32* %sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_in_one_dimension.ll b/polly/test/Isl/Ast/reduction_in_one_dimension.ll
new file mode 100644
index 00000000000..24bbd9f8d14
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_in_one_dimension.ll
@@ -0,0 +1,57 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; Verify that we won't privatize anything in the outer dimension
+;
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 < 2 * n; c1 += 1)
+; CHECK:      #pragma simd reduction
+; CHECK:      for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:        Stmt_for_body3(c1, c3);
+;
+;    void foo(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+;        for (long j = 0; j < 1024; j++)
+;          A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @foo(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_loop_reversal.ll b/polly/test/Isl/Ast/reduction_loop_reversal.ll
new file mode 100644
index 00000000000..25ee3d79634
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_loop_reversal.ll
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT: #pragma simd{{\s*$}}
+; CHECK: #pragma simd reduction
+; CHECK: Stmt_S0(n - c1)
+; CHECK: #pragma simd{{\s*$}}
+; CHECK: Stmt_S1(n - c1)
+;
+;    void rlr(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+; S0:    A[0] += i;
+;      for (long i = 0; i < 2 * n; i++)
+; S1:    A[i + 1] = 1;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rlr(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %S0
+
+S0:                                               ; preds = %for.body
+  %tmp = load i32* %A, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %A, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %S0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc8, %for.end
+  %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
+  %mul3 = shl nsw i32 %n, 1
+  %cmp4 = icmp slt i32 %i1.0, %mul3
+  br i1 %cmp4, label %for.body5, label %for.end10
+
+for.body5:                                        ; preds = %for.cond2
+  br label %S1
+
+S1:                                               ; preds = %for.body5
+  %add6 = add nsw i32 %i1.0, 1
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
+  store i32 1, i32* %arrayidx7, align 4
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %S1
+  %inc9 = add nsw i32 %i1.0, 1
+  br label %for.cond2
+
+for.end10:                                        ; preds = %for.cond2
+  ret void
+}
+
diff --git a/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule.ll b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule.ll
new file mode 100644
index 00000000000..0285dbbec65
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule.ll
@@ -0,0 +1,69 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT: #pragma simd{{\s*$}}
+; CHECK: #pragma simd reduction
+; CHECK: Stmt_S0(2 * n - c1)
+; CHECK: #pragma simd{{\s*$}}
+; CHECK: Stmt_S1
+; CHECK: #pragma simd reduction
+; CHECK: Stmt_S0(2 * n - c1)
+; CHECK-NOT: #pragma simd{{\s*$}}
+;
+;    void rmalrs(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+; S0:    A[0] += i;
+;      for (long i = 0; i < 2 * n; i++)
+; S1:    A[i + 1] = 1;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmalrs(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %S0
+
+S0:                                               ; preds = %for.body
+  %tmp = load i32* %A, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %A, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %S0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc8, %for.end
+  %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
+  %mul3 = shl nsw i32 %n, 1
+  %cmp4 = icmp slt i32 %i1.0, %mul3
+  br i1 %cmp4, label %for.body5, label %for.end10
+
+for.body5:                                        ; preds = %for.cond2
+  br label %S1
+
+S1:                                               ; preds = %for.body5
+  %add6 = add nsw i32 %i1.0, 1
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
+  store i32 1, i32* %arrayidx7, align 4
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %S1
+  %inc9 = add nsw i32 %i1.0, 1
+  br label %for.cond2
+
+for.end10:                                        ; preds = %for.cond2
+  ret void
+}
+
diff --git a/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll
new file mode 100644
index 00000000000..3ac8a29dc71
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll
@@ -0,0 +1,76 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK:    #pragma omp parallel for reduction
+; CHECK:    for (int c0 = 0; c0 <= 2; c0 += 1) {
+; CHECK:      if (c0 == 2) {
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c1 = 1; c1 < 2 * n; c1 += 2)
+; CHECK:          Stmt_S0(c1);
+; CHECK:      } else if (c0 == 1) {
+; CHECK:        #pragma simd
+; CHECK:        for (int c1 = 0; c1 < 2 * n; c1 += 1)
+; CHECK:          Stmt_S1(c1);
+; CHECK:      } else
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c1 = -2 * n + 2; c1 <= 0; c1 += 2)
+; CHECK:          Stmt_S0(-c1);
+; CHECK:    }
+;
+;    void rmalrs2(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+; S0:    A[0] += i;
+;      for (long i = 0; i < 2 * n; i++)
+; S1:    A[i + 1] = 1;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmalrs2(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %S0
+
+S0:                                               ; preds = %for.body
+  %tmp = load i32* %A, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %A, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %S0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc8, %for.end
+  %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
+  %mul3 = shl nsw i32 %n, 1
+  %cmp4 = icmp slt i32 %i1.0, %mul3
+  br i1 %cmp4, label %for.body5, label %for.end10
+
+for.body5:                                        ; preds = %for.cond2
+  br label %S1
+
+S1:                                               ; preds = %for.body5
+  %add6 = add nsw i32 %i1.0, 1
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
+  store i32 1, i32* %arrayidx7, align 4
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %S1
+  %inc9 = add nsw i32 %i1.0, 1
+  br label %for.cond2
+
+for.end10:                                        ; preds = %for.cond2
+  ret void
+}
+
diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule.ll b/polly/test/Isl/Ast/reduction_modulo_schedule.ll
new file mode 100644
index 00000000000..f4dfef63981
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_schedule.ll
@@ -0,0 +1,69 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT: #pragma simd{{\s*$}}
+; CHECK: #pragma simd reduction
+; CHECK: Stmt_S0
+; CHECK: #pragma simd{{\s*$}}
+; CHECK: Stmt_S1
+; CHECK: #pragma simd reduction
+; CHECK: Stmt_S0
+; CHECK-NOT: #pragma simd{{\s*$}}
+;
+;    void rms(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+; S0:    A[0] += i;
+;      for (long i = 0; i < 2 * n; i++)
+; S1:    A[i + 1] = 1;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rms(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %S0
+
+S0:                                               ; preds = %for.body
+  %tmp = load i32* %A, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %A, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %S0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc8, %for.end
+  %i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
+  %mul3 = shl nsw i32 %n, 1
+  %cmp4 = icmp slt i32 %i1.0, %mul3
+  br i1 %cmp4, label %for.body5, label %for.end10
+
+for.body5:                                        ; preds = %for.cond2
+  br label %S1
+
+S1:                                               ; preds = %for.body5
+  %add6 = add nsw i32 %i1.0, 1
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
+  store i32 1, i32* %arrayidx7, align 4
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %S1
+  %inc9 = add nsw i32 %i1.0, 1
+  br label %for.cond2
+
+for.end10:                                        ; preds = %for.cond2
+  ret void
+}
+
diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll
new file mode 100644
index 00000000000..af7f79990c9
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll
@@ -0,0 +1,63 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c0 = 0; c0 <= 1; c0 += 1) {
+; CHECK:      if (c0 == 1) {
+; CHECK:        for (int c1 = 1; c1 < 2 * n; c1 += 2)
+; CHECK:          #pragma simd reduction
+; CHECK:          for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:            Stmt_for_body3(c1, c3);
+; CHECK:      } else
+; CHECK:        for (int c1 = 0; c1 < 2 * n - 1; c1 += 2)
+; CHECK:          #pragma simd reduction
+; CHECK:          for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:            Stmt_for_body3(c1, c3);
+; CHECK:    }
+;
+;    void rmsmd(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+;        for (long j = 0; j < 1024; j++)
+;          A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmsmd(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll
new file mode 100644
index 00000000000..34677d27501
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll
@@ -0,0 +1,64 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; Verify that the outer dimension doesnt't carry reduction dependences
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 < 2 * n; c1 += 1) {
+; CHECK:      if (c1 % 2 == 0) {
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:          Stmt_for_body3(c1, c3);
+; CHECK:      } else
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:          Stmt_for_body3(c1, c3);
+; CHECK:    }
+;
+;    void rmsmd2(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+;        for (long j = 0; j < 1024; j++)
+;          A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmsmd2(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll
new file mode 100644
index 00000000000..0a544d2d364
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; Verify that the outer dimension doesnt't carry reduction dependences
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 < 2 * n; c1 += 1)
+; CHECK:      #pragma simd reduction
+; CHECK:      for (int c3 = 0; c3 <= 1023; c3 += 1) {
+; CHECK:        if (c3 % 2 == 0) {
+; CHECK:          Stmt_for_body3(c1, c3);
+; CHECK:        } else
+; CHECK:          Stmt_for_body3(c1, c3);
+; CHECK:      }
+;
+;    void rmsmd3(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+;        for (long j = 0; j < 1024; j++)
+;          A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmsmd3(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll
new file mode 100644
index 00000000000..8516207125c
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; Verify that the outer dimension doesnt't carry reduction dependences
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 < 2 * n; c1 += 1)
+; CHECK:      #pragma simd reduction
+; CHECK:      for (int c3 = -1022; c3 <= 1023; c3 += 1) {
+; CHECK:        if ((c3 + 1022) % 2 == 0 && c3 <= 0) {
+; CHECK:          Stmt_for_body3(c1, -c3);
+; CHECK:        } else if ((c3 + 1023) % 2 == 0 && c3 >= 1)
+; CHECK:          Stmt_for_body3(c1, c3);
+; CHECK:      }
+;
+;    void rmsmd4(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+;        for (long j = 0; j < 1024; j++)
+;          A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmsmd4(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll
new file mode 100644
index 00000000000..a1e00b8eefe
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; Verify that only the outer dimension needs privatization
+;
+; CHECK:    #pragma omp parallel for reduction
+; CHECK:    for (int c1 = 0; c1 <= 1023; c1 += 1) {
+; CHECK:      if (c1 % 2 == 0) {
+; CHECK-NOT:    #pragma simd reduction
+; CHECK:        #pragma simd
+; CHECK:        for (int c3 = 0; c3 < 2 * n; c3 += 1)
+; CHECK:          Stmt_for_body3(c3, c1);
+; CHECK:      } else
+; CHECK-NOT:    #pragma simd reduction
+; CHECK:        #pragma simd
+; CHECK:        for (int c3 = -2 * n + 1; c3 <= 0; c3 += 1)
+; CHECK:          Stmt_for_body3(-c3, c1);
+; CHECK:    }
+;
+;    void rmsmd5(int *A, long n) {
+;      for (long i = 0; i < 2 * n; i++)
+;        for (long j = 0; j < 1024; j++)
+;          A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmsmd5(i32* %A, i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %mul = shl nsw i32 %n, 1
+  %cmp = icmp slt i32 %i.0, %mul
+  br i1 %cmp, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions.ll
new file mode 100644
index 00000000000..98520dc3eed
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_multiple_dimensions.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 <= 2047; c1 += 1)
+; CHECK:      for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c5 = 0; c5 <= 511; c5 += 1)
+; CHECK:          Stmt_for_body6(c1, c3, c5);
+;
+;    void rmd(int *A) {
+;      for (long i = 0; i < 2048; i++)
+;        for (long j = 0; j < 1024; j++)
+;          for (long k = 0; k < 512; k++)
+;            A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmd(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond2 = icmp ne i32 %i.0, 2048
+  br i1 %exitcond2, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc7, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
+  %exitcond1 = icmp ne i32 %j.0, 1024
+  br i1 %exitcond1, label %for.body3, label %for.end9
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 512
+  br i1 %exitcond, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %inc8 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end9:                                         ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end9
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions_2.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions_2.ll
new file mode 100644
index 00000000000..af63bc7d325
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_multiple_dimensions_2.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 <= 2047; c1 += 1)
+; CHECK:      for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c5 = 0; c5 <= 511; c5 += 1)
+; CHECK:          Stmt_for_body6(c1, c3, c5);
+;
+;    void rmd2(int *A) {
+;      for (long i = 0; i < 2048; i++)
+;        for (long j = 0; j < 1024; j++)
+;          for (long k = 0; k < 512; k++)
+;            A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmd2(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond2 = icmp ne i32 %i.0, 2048
+  br i1 %exitcond2, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc7, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
+  %exitcond1 = icmp ne i32 %j.0, 1024
+  br i1 %exitcond1, label %for.body3, label %for.end9
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 512
+  br i1 %exitcond, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %inc8 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end9:                                         ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end9
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions_3.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions_3.ll
new file mode 100644
index 00000000000..13ae1a193b9
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_multiple_dimensions_3.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 <= 2047; c1 += 1)
+; CHECK:      for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c5 = 0; c5 <= 511; c5 += 1)
+; CHECK:          Stmt_for_body6(c1, c3, c5);
+;
+;    void rmd3(int *A) {
+;      for (long i = 0; i < 2048; i++)
+;        for (long j = 0; j < 1024; j++)
+;          for (long k = 0; k < 512; k++)
+;            A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmd3(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond2 = icmp ne i32 %i.0, 2048
+  br i1 %exitcond2, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc7, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
+  %exitcond1 = icmp ne i32 %j.0, 1024
+  br i1 %exitcond1, label %for.body3, label %for.end9
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 512
+  br i1 %exitcond, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %inc8 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end9:                                         ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end9
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/reduction_multiple_dimensions_4.ll b/polly/test/Isl/Ast/reduction_multiple_dimensions_4.ll
new file mode 100644
index 00000000000..1a8f703a1bd
--- /dev/null
+++ b/polly/test/Isl/Ast/reduction_multiple_dimensions_4.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK-NOT:#pragma omp parallel for reduction
+; CHECK:    #pragma omp parallel for
+; CHECK:    for (int c1 = 0; c1 <= 2047; c1 += 1)
+; CHECK:      for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK:        #pragma simd reduction
+; CHECK:        for (int c5 = 0; c5 <= 511; c5 += 1)
+; CHECK:          Stmt_for_body6(c1, c3, c5);
+;
+;    void rmd4(int *A) {
+;      for (long i = 0; i < 2048; i++)
+;        for (long j = 0; j < 1024; j++)
+;          for (long k = 0; k < 512; k++)
+;            A[i] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @rmd4(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond2 = icmp ne i32 %i.0, 2048
+  br i1 %exitcond2, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc7, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
+  %exitcond1 = icmp ne i32 %j.0, 1024
+  br i1 %exitcond1, label %for.body3, label %for.end9
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 512
+  br i1 %exitcond, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %inc8 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end9:                                         ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end9
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Isl/Ast/rlr___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rlr___%for.cond---%for.end10.jscop
new file mode 100644
index 00000000000..fc50d5c6987
--- /dev/null
+++ b/polly/test/Isl/Ast/rlr___%for.cond---%for.end10.jscop
@@ -0,0 +1,32 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end10",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S0",
+         "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, n - i0, 0] }"
+      },
+      {
+         "accesses" : [
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S1",
+         "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, n - i0, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmalrs2___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rmalrs2___%for.cond---%for.end10.jscop
new file mode 100644
index 00000000000..c84f0a0c889
--- /dev/null
+++ b/polly/test/Isl/Ast/rmalrs2___%for.cond---%for.end10.jscop
@@ -0,0 +1,32 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end10",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S0",
+         "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, -i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
+      },
+      {
+         "accesses" : [
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S1",
+         "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmalrs___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rmalrs___%for.cond---%for.end10.jscop
new file mode 100644
index 00000000000..9740b5758ea
--- /dev/null
+++ b/polly/test/Isl/Ast/rmalrs___%for.cond---%for.end10.jscop
@@ -0,0 +1,32 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end10",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S0",
+         "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, 2 * n - i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, 2  * n - i0, 0]: i0 % 2 = 1 }"
+      },
+      {
+         "accesses" : [
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S1",
+         "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmd2___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd2___%for.cond---%for.end12.jscop
new file mode 100644
index 00000000000..bca97934e20
--- /dev/null
+++ b/polly/test/Isl/Ast/rmd2___%for.cond---%for.end12.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "{  :  }",
+   "name" : "for.cond => for.end12",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
+         "name" : "Stmt_for_body6",
+         "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i1, 0, i0, 0, i2, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmd3___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd3___%for.cond---%for.end12.jscop
new file mode 100644
index 00000000000..7cd7570a386
--- /dev/null
+++ b/polly/test/Isl/Ast/rmd3___%for.cond---%for.end12.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "{  :  }",
+   "name" : "for.cond => for.end12",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
+         "name" : "Stmt_for_body6",
+         "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i1, 0, i0, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmd4___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd4___%for.cond---%for.end12.jscop
new file mode 100644
index 00000000000..b29c322730b
--- /dev/null
+++ b/polly/test/Isl/Ast/rmd4___%for.cond---%for.end12.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "{  :  }",
+   "name" : "for.cond => for.end12",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
+         "name" : "Stmt_for_body6",
+         "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i0, 0, i1, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmd___%for.cond---%for.end12.jscop b/polly/test/Isl/Ast/rmd___%for.cond---%for.end12.jscop
new file mode 100644
index 00000000000..be7cdc2726a
--- /dev/null
+++ b/polly/test/Isl/Ast/rmd___%for.cond---%for.end12.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "{  :  }",
+   "name" : "for.cond => for.end12",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
+         "name" : "Stmt_for_body6",
+         "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rms___%for.cond---%for.end10.jscop b/polly/test/Isl/Ast/rms___%for.cond---%for.end10.jscop
new file mode 100644
index 00000000000..15a6930b81c
--- /dev/null
+++ b/polly/test/Isl/Ast/rms___%for.cond---%for.end10.jscop
@@ -0,0 +1,32 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end10",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S0",
+         "schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
+      },
+      {
+         "accesses" : [
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
+         "name" : "Stmt_S1",
+         "schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmsmd2___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd2___%for.cond---%for.end6.jscop
new file mode 100644
index 00000000000..38cb44bce1f
--- /dev/null
+++ b/polly/test/Isl/Ast/rmsmd2___%for.cond---%for.end6.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end6",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
+         "name" : "Stmt_for_body3",
+         "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 1, i1, 0]: i0 % 2 = 1 }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmsmd3___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd3___%for.cond---%for.end6.jscop
new file mode 100644
index 00000000000..44d71a3e29e
--- /dev/null
+++ b/polly/test/Isl/Ast/rmsmd3___%for.cond---%for.end6.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end6",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
+         "name" : "Stmt_for_body3",
+         "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmsmd4___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd4___%for.cond---%for.end6.jscop
new file mode 100644
index 00000000000..9bfd4aa944c
--- /dev/null
+++ b/polly/test/Isl/Ast/rmsmd4___%for.cond---%for.end6.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end6",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
+         "name" : "Stmt_for_body3",
+         "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, -i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmsmd5___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd5___%for.cond---%for.end6.jscop
new file mode 100644
index 00000000000..ee1c3dd39ef
--- /dev/null
+++ b/polly/test/Isl/Ast/rmsmd5___%for.cond---%for.end6.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end6",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
+         "name" : "Stmt_for_body3",
+         "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i1, 0, i0, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i1, 1, -i0, 0]: i1 % 2 = 1 }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/Ast/rmsmd___%for.cond---%for.end6.jscop b/polly/test/Isl/Ast/rmsmd___%for.cond---%for.end6.jscop
new file mode 100644
index 00000000000..8a8678aa447
--- /dev/null
+++ b/polly/test/Isl/Ast/rmsmd___%for.cond---%for.end6.jscop
@@ -0,0 +1,21 @@
+{
+   "context" : "[n] -> {  : n >= -2147483648 and n <= 2147483647 }",
+   "name" : "for.cond => for.end6",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
+            }
+         ],
+         "domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
+         "name" : "Stmt_for_body3",
+         "schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[1, i0, 0, i1, 0]: i0 % 2 = 1 }"
+      }
+   ]
+}
diff --git a/polly/test/Isl/CodeGen/reduction_simple_binary.ll b/polly/test/Isl/CodeGen/reduction_simple_binary.ll
new file mode 100644
index 00000000000..e9bc1f19eb6
--- /dev/null
+++ b/polly/test/Isl/CodeGen/reduction_simple_binary.ll
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+;
+; CHECK: pragma simd reduction
+; CHECK: pragma omp parallel for reduction
+;
+; int prod;
+; void f() {
+;   for (int i = 0; i < 100; i++)
+;     prod *= i;
+; }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@prod = common global i32 0, align 4
+
+define void @f() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i1.0, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %add2 = add nsw i32 %i1.0, 3
+  %tmp1 = load i32* @prod, align 4
+  %mul3 = mul nsw i32 %tmp1, %add2
+  store i32 %mul3, i32* @prod, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i1.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}