22 files changed, 1307 insertions, 26 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index b91713a6e8b..31f08402e32 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -324,7 +324,14 @@ class ScopStmt {
   __isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion);
   void buildScattering(SmallVectorImpl<unsigned> &Scatter);
   void buildAccesses(TempScop &tempScop, const Region &CurRegion);
-  void checkForReduction();
+
+  /// @brief Detect and mark reductions in the ScopStmt
+  void checkForReductions();
+
+  /// @brief Collect loads which might form a reduction chain with @p StoreMA
+  void
+  collectCandiateReductionLoads(MemoryAccess *StoreMA,
+                                llvm::SmallVectorImpl<MemoryAccess *> &Loads);
   //@}
 
   /// Create the ScopStmt from a BasicBlock.
diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp
index 0390409ae3c..a9ae4b3d75a 100644
--- a/polly/lib/Analysis/Dependences.cpp
+++ b/polly/lib/Analysis/Dependences.cpp
@@ -308,7 +308,6 @@ void Dependences::calculateDependences(Scop &S) {
       isl_map *Identity =
           isl_map_from_domain_and_range(isl_set_copy(AccDomW), AccDomW);
       RED = isl_union_map_add_map(RED, Identity);
-      break;
     }
   }
 
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index d2c8fbf876d..1521b3bf147 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -706,34 +706,36 @@ ScopStmt::ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion,
   Domain = buildDomain(tempScop, CurRegion);
   buildScattering(Scatter);
   buildAccesses(tempScop, CurRegion);
-  checkForReduction();
-}
-
-void ScopStmt::checkForReduction() {
-  // Skip statements with more than one binary reduction
-  if (MemAccs.size() != 2)
-    return;
-
-  // Skip if there is not exactly one load and one store
-  unsigned LoadIdx = MemAccs[0]->isRead() ? 0 : 1;
-  auto *Load = dyn_cast<LoadInst>(MemAccs[LoadIdx]->getAccessInstruction());
-  auto *Store =
-      dyn_cast<StoreInst>(MemAccs[1 - LoadIdx]->getAccessInstruction());
-  if (!Load || !Store ||
-      Load->getPointerOperand() != Store->getPointerOperand())
+  checkForReductions();
+}
+
+/// @brief Collect loads which might form a reduction chain with @p StoreMA
+///
+/// Check if the stored value for @p StoreMA is a binary operator with one or
+/// two loads as operands. If the binary operand is commutative & associative,
+/// used only once (by @p StoreMA) and its load operands are also used only
+/// once, we have found a possible reduction chain. It starts at an operand
+/// load and includes the binary operator and @p StoreMA.
+///
+/// Note: We allow only one use to ensure the load and binary operator cannot
+///       escape this block or into any other store except @p StoreMA.
+void ScopStmt::collectCandiateReductionLoads(
+    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
+  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
+  if (!Store)
     return;
 
   // Skip if there is not one binary operator between the load and the store
   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
-  if (!BinOp || (BinOp->getOperand(0) != Load && BinOp->getOperand(1) != Load))
+  if (!BinOp)
     return;
 
-  // Skip if the opcode of the binary operator is not commutative/associative
-  if (!BinOp->isCommutative() || !BinOp->isAssociative())
+  // Skip if the binary operators has multiple uses
+  if (BinOp->getNumUses() != 1)
     return;
 
-  // Skip if the load has multiple uses
-  if (Load->getNumUses() != 1)
+  // Skip if the opcode of the binary operator is not commutative/associative
+  if (!BinOp->isCommutative() || !BinOp->isAssociative())
     return;
 
   // Skip if it is a multiplicative reduction and we disabled them
@@ -742,9 +744,87 @@ void ScopStmt::checkForReduction() {
        BinOp->getOpcode() == Instruction::FMul))
     return;
 
-  // Valid reduction like access
-  MemAccs[0]->markReductionLike();
-  MemAccs[1]->markReductionLike();
+  // Check the binary operator operands for a candidate load
+  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
+  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
+  if (!PossibleLoad0 && !PossibleLoad1)
+    return;
+
+  // A load is only a candidate if it cannot escape (thus has only this use)
+  if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
+    Loads.push_back(lookupAccessFor(PossibleLoad0));
+  if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
+    Loads.push_back(lookupAccessFor(PossibleLoad1));
+}
+
+/// @brief Check for reductions in this ScopStmt
+///
+/// Iterate over all store memory accesses and check for valid binary reduction
+/// like chains. For all candidates we check if they have the same base address
+/// and there are no other accesses which overlap with them. The base address
+/// check rules out impossible reductions candidates early. The overlap check,
+/// together with the "only one user" check in collectCandiateReductionLoads,
+/// guarantees that none of the intermediate results will escape during
+/// execution of the loop nest. We basically check here that no other memory
+/// access can access the same memory as the potential reduction.
+void ScopStmt::checkForReductions() {
+  SmallVector<MemoryAccess *, 2> Loads;
+  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
+
+  // First collect candidate load-store reduction chains by iterating over all
+  // stores and collecting possible reduction loads.
+  for (MemoryAccess *StoreMA : MemAccs) {
+    if (StoreMA->isRead())
+      continue;
+
+    Loads.clear();
+    collectCandiateReductionLoads(StoreMA, Loads);
+    for (MemoryAccess *LoadMA : Loads)
+      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
+  }
+
+  // Then check each possible candidate pair.
+  for (const auto &CandidatePair : Candidates) {
+    bool Valid = true;
+    isl_map *LoadAccs = CandidatePair.first->getAccessRelation();
+    isl_map *StoreAccs = CandidatePair.second->getAccessRelation();
+
+    // Skip those with obviously unequal base addresses.
+    if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) {
+      isl_map_free(LoadAccs);
+      isl_map_free(StoreAccs);
+      continue;
+    }
+
+    // And check if the remaining for overlap with other memory accesses.
+    isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs);
+    AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain());
+    isl_set *AllAccs = isl_map_range(AllAccsRel);
+
+    for (MemoryAccess *MA : MemAccs) {
+      if (MA == CandidatePair.first || MA == CandidatePair.second)
+        continue;
+
+      isl_map *AccRel =
+          isl_map_intersect_domain(MA->getAccessRelation(), getDomain());
+      isl_set *Accs = isl_map_range(AccRel);
+
+      if (isl_set_has_equal_space(AllAccs, Accs) || isl_set_free(Accs)) {
+        isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs));
+        Valid = Valid && isl_set_is_empty(OverlapAccs);
+        isl_set_free(OverlapAccs);
+      }
+    }
+
+    isl_set_free(AllAccs);
+    if (!Valid)
+      continue;
+
+    // If no overlapping access was found we mark the load and store as
+    // reduction like.
+    CandidatePair.first->markReductionLike();
+    CandidatePair.second->markReductionLike();
+  }
 }
 
 std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); }
diff --git a/polly/test/Dependences/reduction_complex_location.ll b/polly/test/Dependences/reduction_complex_location.ll
new file mode 100644
index 00000000000..8ed46b7aaba
--- /dev/null
+++ b/polly/test/Dependences/reduction_complex_location.ll
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK:   {  }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   {  }
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body3[i0, i1] -> Stmt_for_body3[2 + i0, -1 + i1] : i0 <= 97 and i0 >= 0 and i1 <= 99 and i1 >= 1 }
+;
+; void f(int *sum) {
+;   for (int i = 0; i < 100; i++)
+;     for (int j = 0; j < 100; j++)
+;       sum[i+j*2] += j * i;
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 100
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %mul = mul nsw i32 %j.0, %i.0
+  %mul4 = shl nsw i32 %j.0, 1
+  %add = add nsw i32 %i.0, %mul4
+  %arrayidx = getelementptr inbounds i32* %sum, i32 %add
+  %tmp = load i32* %arrayidx, align 4
+  %add5 = add nsw i32 %tmp, %mul
+  store i32 %add5, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
new file mode 100644
index 00000000000..248d7b8c361
--- /dev/null
+++ b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
@@ -0,0 +1,60 @@
+; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK-DAG: Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, o1] : i0 <= 1022 and i0 >= 0 and o1 <= 511 and o1 >= 1
+; CHECK-DAG: Stmt_for_body3[i0, 0] -> Stmt_for_body3[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 511 and o1 >= 1
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2
+; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 }
+;
+; void f(int *sum) {
+;   for (int i = 0; i < 1024; i++)
+;     for (int j = 0; j < 512; j++)
+;       sum[i + j] = sum[i] + 3;
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 512
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sum, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 3
+  %add4 = add nsw i32 %i.0, %j.0
+  %arrayidx5 = getelementptr inbounds i32* %sum, i32 %add4
+  store i32 %add, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Dependences/reduction_multiple_loops_array_sum.ll b/polly/test/Dependences/reduction_multiple_loops_array_sum.ll
new file mode 100644
index 00000000000..20903fbfac7
--- /dev/null
+++ b/polly/test/Dependences/reduction_multiple_loops_array_sum.ll
@@ -0,0 +1,77 @@
+; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s
+;
+; Verify that only the inner reduction like accesses cause reduction dependences
+;
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 }
+;
+; void f(int * restrict A, int * restrict sum) {
+;   int i, j, k;
+;   for (i = 0; i < 100; i++) {
+;     *sum *= 7;
+;     for (j = 0; j < 100; j++) {
+;       *sum += A[i+j];
+;       for (k = 0; k< 100; k++) {}
+;     }
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc11, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  %exitcond2 = icmp ne i32 %i.0, 100
+  br i1 %exitcond2, label %for.body, label %for.end13
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load i32* %sum, align 4
+  %mul = mul nsw i32 %tmp, 7
+  store i32 %mul, i32* %sum, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc8, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ]
+  %exitcond1 = icmp ne i32 %j.0, 100
+  br i1 %exitcond1, label %for.body3, label %for.end10
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp3 = load i32* %arrayidx, align 4
+  %tmp4 = load i32* %sum, align 4
+  %add4 = add nsw i32 %tmp4, %tmp3
+  store i32 %add4, i32* %sum, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 100
+  br i1 %exitcond, label %for.body7, label %for.end
+
+for.body7:                                        ; preds = %for.cond5
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body7
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond5
+
+for.end:                                          ; preds = %for.cond5
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.end
+  %inc9 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end10:                                        ; preds = %for.cond1
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.end10
+  %inc12 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll b/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll
new file mode 100644
index 00000000000..33982921f20
--- /dev/null
+++ b/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll
@@ -0,0 +1,79 @@
+; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK:   {  }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   {  }
+; CHECK: Reduction dependences:
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0
+; CHECK-DAG: Stmt_for_body3[i0, 99] -> Stmt_for_body3[1 + i0, 0] : i0 <= 98 and i0 >= 0
+;
+; int f(int * restrict A, int * restrict sum) {
+;   int i, j, k;
+;   for (i = 0; i < 100; i++) {
+;     for (j = 0; j < 100; j++) {
+;       sum += A[i+j];
+;       for (k = 0; k< 100; k++) {}
+;     }
+;   }
+;   return sum;
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc11, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  %exitcond2 = icmp ne i32 %i.0, 100
+  br i1 %exitcond2, label %for.body, label %for.end13
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc8, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ]
+  %exitcond1 = icmp ne i32 %j.0, 100
+  br i1 %exitcond1, label %for.body3, label %for.end10
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp3 = load i32* %arrayidx, align 4
+  %tmp4 = load i32* %sum, align 4
+  %add4 = add nsw i32 %tmp4, %tmp3
+  store i32 %add4, i32* %sum, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 100
+  br i1 %exitcond, label %for.body7, label %for.end
+
+for.body7:                                        ; preds = %for.cond5
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body7
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond5
+
+for.end:                                          ; preds = %for.cond5
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.end
+  %inc9 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end10:                                        ; preds = %for.cond1
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.end10
+  %inc12 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll b/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll
new file mode 100644
index 00000000000..4b699a1ea05
--- /dev/null
+++ b/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll
@@ -0,0 +1,73 @@
+; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
+;
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_inc[i0, i1] -> Stmt_for_inc[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 }
+;
+; int f(int * __restrict__ A) {
+;   int i, j, sum = 0;
+;   for (k = 0; k < 37; k = g(k)) {
+;     for (i = 0; i < 100; i++) {
+;       sum *= 2;
+;       for (j = 0; j < 100; j++) {
+;         sum += A[i+j];
+;       }
+;     }
+;   }
+;   return sum;
+; }
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i64 @g(i64)
+
+define i32 @f(i32* noalias %A) {
+entry:
+  %sum.04.reg2mem = alloca i32
+  %sum.12.reg2mem = alloca i32
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  store i32 0, i32* %sum.04.reg2mem
+  br label %for.body_outer_split
+
+for.body_outer_split:                             ; preds = %entry.split, %for.inc5
+  %indvars.ivK = phi i64 [ 0, %entry.split ], [ %incK, %for.bos2 ]
+  br label %for.body_outer
+
+for.body_outer:                                   ; preds = %for.body_outer_split
+  %incK = call i64 @g(i64 %indvars.ivK)
+  %exitcondK = icmp eq i64 %incK, 100
+  br i1 %exitcondK, label %for.end7, label %for.body
+
+for.body:                                         ; preds = %for.inc5, %for.body_outer
+  %indvars.iv23 = phi i64 [ 0, %for.body_outer ], [ %3, %for.inc5 ]
+  %sum.04.reload = load i32* %sum.04.reg2mem
+  %mul = shl nsw i32 %sum.04.reload, 1
+  store i32 %mul, i32* %sum.12.reg2mem
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %for.body
+  %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
+  %sum.12.reload = load i32* %sum.12.reg2mem
+  %0 = add i64 %indvars.iv23, %indvars.iv1
+  %arrayidx = getelementptr i32* %A, i64 %0
+  %tmp5 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, %sum.12.reload
+  %1 = add nuw nsw i64 %indvars.iv1, 1
+  %exitcond1 = icmp eq i64 %1, 100
+  store i32 %add4, i32* %sum.12.reg2mem
+  br i1 %exitcond1, label %for.inc5, label %for.inc
+
+for.inc5:                                         ; preds = %for.inc
+  %2 = load i32* %sum.12.reg2mem
+  %3 = add nuw nsw i64 %indvars.iv23, 1
+  %exitcond2 = icmp eq i64 %3, 100
+  store i32 %2, i32* %sum.04.reg2mem
+  br i1 %exitcond2, label %for.bos2, label %for.body
+
+for.bos2:
+  br label %for.body_outer_split
+
+for.end7:                                         ; preds = %for.inc5
+  %4 = load i32* %sum.04.reg2mem
+  ret i32 %4
+}
diff --git a/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll b/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll
new file mode 100644
index 00000000000..c48d8f07ecc
--- /dev/null
+++ b/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll
@@ -0,0 +1,69 @@
+; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
+;
+; CHECK: Reduction dependences:
+; CHECK:   [N] -> { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 and i1 >= 1024 - N + i0 }
+;
+; void f(int N, int * restrict sums, int * restrict escape) {
+;   for (int i = 0; i < 1024; i++) {
+;     for (int j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       if (N - i + j < 1024)
+;         escape[N - i + j] = sums[i];
+;     }
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %N, %i.0
+  %add4 = add nsw i32 %sub, %j.0
+  %cmp5 = icmp slt i32 %add4, 1024
+  br i1 %cmp5, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body3
+  %arrayidx6 = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp2 = load i32* %arrayidx6, align 4
+  %sub7 = sub nsw i32 %N, %i.0
+  %add8 = add nsw i32 %sub7, %j.0
+  %arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8
+  store i32 %tmp2, i32* %arrayidx9, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body3
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/Dependences/reduction_two_reductions_different_rloops.ll b/polly/test/Dependences/reduction_two_reductions_different_rloops.ll
new file mode 100644
index 00000000000..4470e4e0484
--- /dev/null
+++ b/polly/test/Dependences/reduction_two_reductions_different_rloops.ll
@@ -0,0 +1,73 @@
+; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK:   {  }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   {  }
+; CHECK: Reduction dependences:
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, i1] : i0 <= 1022 and i0 >= 0 and i1 <= 1023 and i1 >= 0
+;
+; void f(int *restrict A, int *restrict B, int *restrict Values) {
+;   for (int i = 0; i < 1024; i++) {
+;     for (int j = 0; j < 1024; j++) {
+;       A[i] += Values[i + j - 1];
+;       B[j] += Values[i + j + 42];
+;     }
+;   }
+; }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %Values)  {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc11, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end13
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %sub = add nsw i32 %add, -1
+  %arrayidx = getelementptr inbounds i32* %Values, i32 %sub
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp2 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %tmp2, %tmp
+  store i32 %add5, i32* %arrayidx4, align 4
+  %add6 = add nsw i32 %i.0, %j.0
+  %add7 = add nsw i32 %add6, 42
+  %arrayidx8 = getelementptr inbounds i32* %Values, i32 %add7
+  %tmp3 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %B, i32 %j.0
+  %tmp4 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %tmp4, %tmp3
+  store i32 %add10, i32* %arrayidx9, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.end
+  %inc12 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_alternating_base.ll b/polly/test/ScopInfo/reduction_alternating_base.ll
new file mode 100644
index 00000000000..ce98f01c4aa
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_alternating_base.ll
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; FIXME: We cannot detect this SCoP yet but as soon as we can we should check
+;        that the reduction is detected!
+;
+; CHECK-NOT: Scattering
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1024; i++)
+;        A[i % 2] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %rem = srem i32 %i.0, 2
+  %arrayidx = getelementptr inbounds i32* %A, i32 %rem
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_disabled_multiplicative.ll b/polly/test/ScopInfo/reduction_disabled_multiplicative.ll
new file mode 100644
index 00000000000..159e7f53305
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_disabled_multiplicative.ll
@@ -0,0 +1,52 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze -polly-disable-multiplicative-reductions < %s | FileCheck %s
+;
+; CHECK: ReadAccess :=       [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+;
+; int sum, prod;
+; 
+; void f() {
+;   int i;
+;   for (int i = 0; i < 100; i++) {
+;     sum += i * 3;
+;     prod *= (i + 3);
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@sum = common global i32 0, align 4
+@prod = common global i32 0, align 4
+
+define void @f() #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i1.0, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %mul = mul nsw i32 %i1.0, 3
+  %tmp = load i32* @sum, align 4
+  %add = add nsw i32 %tmp, %mul
+  store i32 %add, i32* @sum, align 4
+  %add2 = add nsw i32 %i1.0, 3
+  %tmp1 = load i32* @prod, align 4
+  %mul3 = mul nsw i32 %tmp1, %add2
+  store i32 %mul3, i32* @prod, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i1.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_escaping_intermediate.ll b/polly/test/ScopInfo/reduction_escaping_intermediate.ll
new file mode 100644
index 00000000000..c5f5cc03928
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_escaping_intermediate.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(int N, int * restrict sums, int * restrict escape) {
+;   int i, j;
+;   for (i = 0; i < 1024; i++) {
+;     for (j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       escape[N - i + j] = sums[i];
+;     }
+;   }
+; }
+;
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: escape
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc7, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end9
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %N, %i.0
+  %add5 = add nsw i32 %sub, %j.0
+  %arrayidx6 = getelementptr inbounds i32* %escape, i32 %add5
+  store i32 %add, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %inc8 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end9:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll b/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll
new file mode 100644
index 00000000000..ccf87f17483
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(int N, int * restrict sums, int * restrict escape) {
+;   int i, j;
+;   for (i = 0; i < 1024; i++) {
+;     for (j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       escape[N-j] = escape[i] + sums[i-1];
+;     }
+;   }
+; }
+;
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: escape
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: escape
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %escape, i32 %i.0
+  %tmp2 = load i32* %arrayidx4, align 4
+  %sub = add nsw i32 %i.0, -1
+  %arrayidx5 = getelementptr inbounds i32* %sums, i32 %sub
+  %tmp3 = load i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %tmp2, %tmp3
+  %sub7 = sub nsw i32 %N, %i.0
+  %add8 = add nsw i32 %sub7, %j.0
+  %arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8
+  store i32 %add6, i32* %arrayidx9, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_invalid_different_operators.ll b/polly/test/ScopInfo/reduction_invalid_different_operators.ll
new file mode 100644
index 00000000000..81c02bc6717
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_invalid_different_operators.ll
@@ -0,0 +1,49 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; int f() {
+;   int i, sum = 0, sth = 0;
+;   for (i = 0; i < 1024; i++) {
+;     sum += 5;
+;     sth = sth + sth * sth + sth;
+;     sum *= 5;
+;   }
+;   return sum + sth;
+; }
+;
+; CHECK-NOT: Reduction like: 1
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define i32 @f() {
+entry:
+  %sum.0 = alloca i32
+  %sth.0 = alloca i32
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  store i32 0, i32* %sum.0
+  store i32 0, i32* %sth.0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry.split
+  %i.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ]
+  %sth.0.reload = load i32* %sth.0
+  %sum.0.reload = load i32* %sum.0
+  %exitcond = icmp ne i32 %i.0, 1024
+  %mul = mul nsw i32 %sth.0.reload, %sth.0.reload
+  %add1 = add nsw i32 %sth.0.reload, %mul
+  %tmp = mul i32 %sum.0.reload, 5
+  store i32 %tmp, i32* %sum.0
+  %sum.1.reload = load i32* %sum.0
+  %mul3 = add i32 %sum.1.reload, 25
+  %add2 = add nsw i32 %add1, %sth.0.reload
+  %inc = add nsw i32 %i.0, 1
+  store i32 %mul3, i32* %sum.0
+  store i32 %add2, i32* %sth.0
+  br i1 %exitcond, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.cond
+  %sum.0.reload.2 = load i32* %sum.0
+  %sth.0.reload.2 = load i32* %sth.0
+  %add4 = add nsw i32 %sum.0.reload.2, %sth.0.reload.2
+  ret i32 %add4
+}
diff --git a/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll b/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll
new file mode 100644
index 00000000000..a75e9de9a85
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll
@@ -0,0 +1,58 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(int *sums) {
+;   int i, j;
+;   for (i = 0; i < 1024; i++) {
+;     for (j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       sums[i+10] *= 5;
+;     }
+;   }
+; }
+;
+; CHECK-NOT: Reduction like: 1
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sums) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %i.0, 10
+  %arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4
+  %tmp2 = load i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %tmp2, 5
+  store i32 %mul, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll b/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll
new file mode 100644
index 00000000000..1f7013dd6d5
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll
@@ -0,0 +1,78 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Stmt_for_body3
+; CHECK: Reduction like: 0
+; CHECK: MemRef_A
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Stmt_for_end
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+;
+; void f(int *restrict A, int *restrict sum) {
+;   int i, j;
+;   for (i = 0; i < 100; i++) {
+;     *sum *= 7;
+;     for (j = 0; j < 100; j++) {
+;       *sum += A[i + j];
+;     }
+;     *sum *= 5;
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load i32* %sum, align 4
+  %mul = mul nsw i32 %tmp, 7
+  store i32 %mul, i32* %sum, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 100
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %sum, align 4
+  %add4 = add nsw i32 %tmp3, %tmp2
+  store i32 %add4, i32* %sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  %tmp4 = load i32* %sum, align 4
+  %mul5 = mul nsw i32 %tmp4, 5
+  store i32 %mul5, i32* %sum, align 4
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll b/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll
new file mode 100644
index 00000000000..5148492e1e4
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll
@@ -0,0 +1,72 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_04
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_12
+; CHECK: Stmt_for_inc
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum_12
+; CHECK: Reduction like: 0
+; CHECK: MemRef_A
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum_12
+; CHECK: Stmt_for_inc5
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_12
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_04
+;
+; int f(int * __restrict__ A) {
+;   int i, j, sum = 1;
+;   for (i = 0; i < 100; i++) {
+;     sum *= 7;
+;     for (j = 0; j < 100; j++) {
+;       sum += A[i+j];
+;     }
+;   }
+;   return sum;
+; }
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @f(i32* noalias %A) {
+entry:
+  %sum.04.reg2mem = alloca i32
+  %sum.12.reg2mem = alloca i32
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  store i32 0, i32* %sum.04.reg2mem
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc5, %entry.split
+  %indvars.iv23 = phi i64 [ 0, %entry.split ], [ %3, %for.inc5 ]
+  %sum.04.reload = load i32* %sum.04.reg2mem
+  %mul = mul nsw i32 %sum.04.reload, 7
+  store i32 %mul, i32* %sum.12.reg2mem
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %for.body
+  %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
+  %sum.12.reload = load i32* %sum.12.reg2mem
+  %0 = add i64 %indvars.iv23, %indvars.iv1
+  %arrayidx = getelementptr i32* %A, i64 %0
+  %tmp5 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, %sum.12.reload
+  %1 = add nuw nsw i64 %indvars.iv1, 1
+  %exitcond1 = icmp eq i64 %1, 100
+  store i32 %add4, i32* %sum.12.reg2mem
+  br i1 %exitcond1, label %for.inc5, label %for.inc
+
+for.inc5:                                         ; preds = %for.inc
+  %2 = load i32* %sum.12.reg2mem
+  %3 = add nuw nsw i64 %indvars.iv23, 1
+  %exitcond2 = icmp eq i64 %3, 100
+  store i32 %2, i32* %sum.04.reg2mem
+  br i1 %exitcond2, label %for.end7, label %for.body
+
+for.end7:                                         ; preds = %for.inc5
+  %4 = load i32* %sum.04.reg2mem
+  ret i32 %4
+}
diff --git a/polly/test/ScopInfo/reduction_multiple_simple_binary.ll b/polly/test/ScopInfo/reduction_multiple_simple_binary.ll
new file mode 100644
index 00000000000..52ddcd15a27
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_multiple_simple_binary.ll
@@ -0,0 +1,98 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[1 + i0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[i0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_first[0] };
+; CHECK: ReadAccess :=       [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[-1 + i0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[i0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_middle[0] };
+; CHECK: ReadAccess :=       [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[-1 + i0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[1 + i0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_last[0] };
+;
+; int first, sum, middle, prod, last;
+;
+; void f(int * restrict A) {
+;   int i;
+;   for (int i = 0; i < 100; i++) {
+;     first = A[i+1] + A[i];
+;     sum += i * 3;
+;     middle = A[i-1] + A[i];
+;     prod *= (i + 3);
+;     last = A[i-1] + A[i+1];
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@first = common global i32 0, align 4
+@sum = common global i32 0, align 4
+@middle = common global i32 0, align 4
+@prod = common global i32 0, align 4
+@last = common global i32 0, align 4
+
+define void @f(i32* noalias %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i1.0, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %add = add nsw i32 %i1.0, 1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i1.0
+  %tmp1 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %tmp, %tmp1
+  store i32 %add3, i32* @first, align 4
+  %mul = mul nsw i32 %i1.0, 3
+  %tmp2 = load i32* @sum, align 4
+  %add4 = add nsw i32 %tmp2, %mul
+  store i32 %add4, i32* @sum, align 4
+  %sub = add nsw i32 %i1.0, -1
+  %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub
+  %tmp3 = load i32* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds i32* %A, i32 %i1.0
+  %tmp4 = load i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %tmp3, %tmp4
+  store i32 %add7, i32* @middle, align 4
+  %add8 = add nsw i32 %i1.0, 3
+  %tmp5 = load i32* @prod, align 4
+  %mul9 = mul nsw i32 %tmp5, %add8
+  store i32 %mul9, i32* @prod, align 4
+  %sub10 = add nsw i32 %i1.0, -1
+  %arrayidx11 = getelementptr inbounds i32* %A, i32 %sub10
+  %tmp6 = load i32* %arrayidx11, align 4
+  %add12 = add nsw i32 %i1.0, 1
+  %arrayidx13 = getelementptr inbounds i32* %A, i32 %add12
+  %tmp7 = load i32* %arrayidx13, align 4
+  %add14 = add nsw i32 %tmp6, %tmp7
+  store i32 %add14, i32* @last, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i1.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_non_overlapping_chains.ll b/polly/test/ScopInfo/reduction_non_overlapping_chains.ll
new file mode 100644
index 00000000000..7cd1ba7e97d
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_non_overlapping_chains.ll
@@ -0,0 +1,60 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Reduction like: 1
+; CHECK: Reduction like: 1
+; CHECK: Reduction like: 1
+; CHECK: Reduction like: 1
+;
+; void f(int *sums) {
+;   for (int i = 0; i < 1024; i++) {
+;     for (int j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       sums[i+1024] *= 5;
+;     }
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sums) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %i.0, 1024
+  %arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4
+  %tmp2 = load i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %tmp2, 5
+  store i32 %mul, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
diff --git a/polly/test/ScopInfo/reduction_only_reduction_like_access.ll b/polly/test/ScopInfo/reduction_only_reduction_like_access.ll
index 54c9fde75e1..4eb428ff7e2 100644
--- a/polly/test/ScopInfo/reduction_only_reduction_like_access.ll
+++ b/polly/test/ScopInfo/reduction_only_reduction_like_access.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
 ;
-; CHECK: Reduction like: 0
+; CHECK: Reduction like: 1
 ;
 ; void f(int *sum) {
 ;   for (int i = 0; i < 100; i++)
diff --git a/polly/test/ScopInfo/reduction_two_identical_reads.ll b/polly/test/ScopInfo/reduction_two_identical_reads.ll
new file mode 100644
index 00000000000..160441bf06d
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_two_identical_reads.ll
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK-NOT: Reduction like: 1
+;
+; Check that we do not mark these accesses as reduction like.
+; We do this for the case the loads are modelt with the same LLVM-IR value and
+; for the case there are different LLVM-IR values.
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1024; i++)
+;        A[i] = A[i] + A[i];
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f_one_load_case(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %tmp
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @f_two_loads_case(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidxCopy = getelementptr inbounds i32* %A, i32 %i.0
+  %tmpCopy = load i32* %arrayidxCopy, align 4
+  %add = add nsw i32 %tmp, %tmpCopy
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}