diff options
22 files changed, 1307 insertions, 26 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index b91713a6e8b..31f08402e32 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -324,7 +324,14 @@ class ScopStmt { __isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion); void buildScattering(SmallVectorImpl<unsigned> &Scatter); void buildAccesses(TempScop &tempScop, const Region &CurRegion); - void checkForReduction(); + + /// @brief Detect and mark reductions in the ScopStmt + void checkForReductions(); + + /// @brief Collect loads which might form a reduction chain with @p StoreMA + void + collectCandiateReductionLoads(MemoryAccess *StoreMA, + llvm::SmallVectorImpl<MemoryAccess *> &Loads); //@} /// Create the ScopStmt from a BasicBlock. diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp index 0390409ae3c..a9ae4b3d75a 100644 --- a/polly/lib/Analysis/Dependences.cpp +++ b/polly/lib/Analysis/Dependences.cpp @@ -308,7 +308,6 @@ void Dependences::calculateDependences(Scop &S) { isl_map *Identity = isl_map_from_domain_and_range(isl_set_copy(AccDomW), AccDomW); RED = isl_union_map_add_map(RED, Identity); - break; } } diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index d2c8fbf876d..1521b3bf147 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -706,34 +706,36 @@ ScopStmt::ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion, Domain = buildDomain(tempScop, CurRegion); buildScattering(Scatter); buildAccesses(tempScop, CurRegion); - checkForReduction(); -} - -void ScopStmt::checkForReduction() { - // Skip statements with more than one binary reduction - if (MemAccs.size() != 2) - return; - - // Skip if there is not exactly one load and one store - unsigned LoadIdx = MemAccs[0]->isRead() ? 0 : 1; - auto *Load = dyn_cast<LoadInst>(MemAccs[LoadIdx]->getAccessInstruction()); - auto *Store = - dyn_cast<StoreInst>(MemAccs[1 - LoadIdx]->getAccessInstruction()); - if (!Load || !Store || - Load->getPointerOperand() != Store->getPointerOperand()) + checkForReductions(); +} + +/// @brief Collect loads which might form a reduction chain with @p StoreMA +/// +/// Check if the stored value for @p StoreMA is a binary operator with one or +/// two loads as operands. If the binary operand is commutative & associative, +/// used only once (by @p StoreMA) and its load operands are also used only +/// once, we have found a possible reduction chain. It starts at an operand +/// load and includes the binary operator and @p StoreMA. +/// +/// Note: We allow only one use to ensure the load and binary operator cannot +/// escape this block or into any other store except @p StoreMA. +void ScopStmt::collectCandiateReductionLoads( + MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) { + auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction()); + if (!Store) return; // Skip if there is not one binary operator between the load and the store auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand()); - if (!BinOp || (BinOp->getOperand(0) != Load && BinOp->getOperand(1) != Load)) + if (!BinOp) return; - // Skip if the opcode of the binary operator is not commutative/associative - if (!BinOp->isCommutative() || !BinOp->isAssociative()) + // Skip if the binary operators has multiple uses + if (BinOp->getNumUses() != 1) return; - // Skip if the load has multiple uses - if (Load->getNumUses() != 1) + // Skip if the opcode of the binary operator is not commutative/associative + if (!BinOp->isCommutative() || !BinOp->isAssociative()) return; // Skip if it is a multiplicative reduction and we disabled them @@ -742,9 +744,87 @@ void ScopStmt::checkForReduction() { BinOp->getOpcode() == Instruction::FMul)) return; - // Valid reduction like access - MemAccs[0]->markReductionLike(); - MemAccs[1]->markReductionLike(); + // Check the binary operator operands for a candidate load + auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0)); + auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1)); + if (!PossibleLoad0 && !PossibleLoad1) + return; + + // A load is only a candidate if it cannot escape (thus has only this use) + if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1) + Loads.push_back(lookupAccessFor(PossibleLoad0)); + if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1) + Loads.push_back(lookupAccessFor(PossibleLoad1)); +} + +/// @brief Check for reductions in this ScopStmt +/// +/// Iterate over all store memory accesses and check for valid binary reduction +/// like chains. For all candidates we check if they have the same base address +/// and there are no other accesses which overlap with them. The base address +/// check rules out impossible reductions candidates early. The overlap check, +/// together with the "only one user" check in collectCandiateReductionLoads, +/// guarantees that none of the intermediate results will escape during +/// execution of the loop nest. We basically check here that no other memory +/// access can access the same memory as the potential reduction. +void ScopStmt::checkForReductions() { + SmallVector<MemoryAccess *, 2> Loads; + SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates; + + // First collect candidate load-store reduction chains by iterating over all + // stores and collecting possible reduction loads. + for (MemoryAccess *StoreMA : MemAccs) { + if (StoreMA->isRead()) + continue; + + Loads.clear(); + collectCandiateReductionLoads(StoreMA, Loads); + for (MemoryAccess *LoadMA : Loads) + Candidates.push_back(std::make_pair(LoadMA, StoreMA)); + } + + // Then check each possible candidate pair. + for (const auto &CandidatePair : Candidates) { + bool Valid = true; + isl_map *LoadAccs = CandidatePair.first->getAccessRelation(); + isl_map *StoreAccs = CandidatePair.second->getAccessRelation(); + + // Skip those with obviously unequal base addresses. + if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) { + isl_map_free(LoadAccs); + isl_map_free(StoreAccs); + continue; + } + + // And check if the remaining for overlap with other memory accesses. + isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs); + AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain()); + isl_set *AllAccs = isl_map_range(AllAccsRel); + + for (MemoryAccess *MA : MemAccs) { + if (MA == CandidatePair.first || MA == CandidatePair.second) + continue; + + isl_map *AccRel = + isl_map_intersect_domain(MA->getAccessRelation(), getDomain()); + isl_set *Accs = isl_map_range(AccRel); + + if (isl_set_has_equal_space(AllAccs, Accs) || isl_set_free(Accs)) { + isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs)); + Valid = Valid && isl_set_is_empty(OverlapAccs); + isl_set_free(OverlapAccs); + } + } + + isl_set_free(AllAccs); + if (!Valid) + continue; + + // If no overlapping access was found we mark the load and store as + // reduction like. + CandidatePair.first->markReductionLike(); + CandidatePair.second->markReductionLike(); + } } std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); } diff --git a/polly/test/Dependences/reduction_complex_location.ll b/polly/test/Dependences/reduction_complex_location.ll new file mode 100644 index 00000000000..8ed46b7aaba --- /dev/null +++ b/polly/test/Dependences/reduction_complex_location.ll @@ -0,0 +1,59 @@ +; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK: { } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { } +; CHECK: Reduction dependences: +; CHECK: { Stmt_for_body3[i0, i1] -> Stmt_for_body3[2 + i0, -1 + i1] : i0 <= 97 and i0 >= 0 and i1 <= 99 and i1 >= 1 } +; +; void f(int *sum) { +; for (int i = 0; i < 100; i++) +; for (int j = 0; j < 100; j++) +; sum[i+j*2] += j * i; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %mul = mul nsw i32 %j.0, %i.0 + %mul4 = shl nsw i32 %j.0, 1 + %add = add nsw i32 %i.0, %mul4 + %arrayidx = getelementptr inbounds i32* %sum, i32 %add + %tmp = load i32* %arrayidx, align 4 + %add5 = add nsw i32 %tmp, %mul + store i32 %add5, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll new file mode 100644 index 00000000000..248d7b8c361 --- /dev/null +++ b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, o1] : i0 <= 1022 and i0 >= 0 and o1 <= 511 and o1 >= 1 +; CHECK-DAG: Stmt_for_body3[i0, 0] -> Stmt_for_body3[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 511 and o1 >= 1 +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2 +; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0 +; CHECK: Reduction dependences: +; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 } +; +; void f(int *sum) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 512; j++) +; sum[i + j] = sum[i] + 3; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 512 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sum, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 3 + %add4 = add nsw i32 %i.0, %j.0 + %arrayidx5 = getelementptr inbounds i32* %sum, i32 %add4 + store i32 %add, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_multiple_loops_array_sum.ll b/polly/test/Dependences/reduction_multiple_loops_array_sum.ll new file mode 100644 index 00000000000..20903fbfac7 --- /dev/null +++ b/polly/test/Dependences/reduction_multiple_loops_array_sum.ll @@ -0,0 +1,77 @@ +; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; Verify that only the inner reduction like accesses cause reduction dependences +; +; CHECK: Reduction dependences: +; CHECK: { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 } +; +; void f(int * restrict A, int * restrict sum) { +; int i, j, k; +; for (i = 0; i < 100; i++) { +; *sum *= 7; +; for (j = 0; j < 100; j++) { +; *sum += A[i+j]; +; for (k = 0; k< 100; k++) {} +; } +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %A, i32* noalias %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ] + %exitcond2 = icmp ne i32 %i.0, 100 + br i1 %exitcond2, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + %tmp = load i32* %sum, align 4 + %mul = mul nsw i32 %tmp, 7 + store i32 %mul, i32* %sum, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc8, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ] + %exitcond1 = icmp ne i32 %j.0, 100 + br i1 %exitcond1, label %for.body3, label %for.end10 + +for.body3: ; preds = %for.cond1 + %add = add nsw i32 %i.0, %j.0 + %arrayidx = getelementptr inbounds i32* %A, i32 %add + %tmp3 = load i32* %arrayidx, align 4 + %tmp4 = load i32* %sum, align 4 + %add4 = add nsw i32 %tmp4, %tmp3 + store i32 %add4, i32* %sum, align 4 + br label %for.cond5 + +for.cond5: ; preds = %for.inc, %for.body3 + %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %k.0, 100 + br i1 %exitcond, label %for.body7, label %for.end + +for.body7: ; preds = %for.cond5 + br label %for.inc + +for.inc: ; preds = %for.body7 + %inc = add nsw i32 %k.0, 1 + br label %for.cond5 + +for.end: ; preds = %for.cond5 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %inc9 = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end10: ; preds = %for.cond1 + br label %for.inc11 + +for.inc11: ; preds = %for.end10 + %inc12 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll b/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll new file mode 100644 index 00000000000..33982921f20 --- /dev/null +++ b/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll @@ -0,0 +1,79 @@ +; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK: { } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { } +; CHECK: Reduction dependences: +; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 +; CHECK-DAG: Stmt_for_body3[i0, 99] -> Stmt_for_body3[1 + i0, 0] : i0 <= 98 and i0 >= 0 +; +; int f(int * restrict A, int * restrict sum) { +; int i, j, k; +; for (i = 0; i < 100; i++) { +; for (j = 0; j < 100; j++) { +; sum += A[i+j]; +; for (k = 0; k< 100; k++) {} +; } +; } +; return sum; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %A, i32* noalias %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ] + %exitcond2 = icmp ne i32 %i.0, 100 + br i1 %exitcond2, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc8, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ] + %exitcond1 = icmp ne i32 %j.0, 100 + br i1 %exitcond1, label %for.body3, label %for.end10 + +for.body3: ; preds = %for.cond1 + %add = add nsw i32 %i.0, %j.0 + %arrayidx = getelementptr inbounds i32* %A, i32 %add + %tmp3 = load i32* %arrayidx, align 4 + %tmp4 = load i32* %sum, align 4 + %add4 = add nsw i32 %tmp4, %tmp3 + store i32 %add4, i32* %sum, align 4 + br label %for.cond5 + +for.cond5: ; preds = %for.inc, %for.body3 + %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %k.0, 100 + br i1 %exitcond, label %for.body7, label %for.end + +for.body7: ; preds = %for.cond5 + br label %for.inc + +for.inc: ; preds = %for.body7 + %inc = add nsw i32 %k.0, 1 + br label %for.cond5 + +for.end: ; preds = %for.cond5 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %inc9 = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end10: ; preds = %for.cond1 + br label %for.inc11 + +for.inc11: ; preds = %for.end10 + %inc12 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll b/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll new file mode 100644 index 00000000000..4b699a1ea05 --- /dev/null +++ b/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll @@ -0,0 +1,73 @@ +; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s +; +; CHECK: Reduction dependences: +; CHECK: { Stmt_for_inc[i0, i1] -> Stmt_for_inc[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 } +; +; int f(int * __restrict__ A) { +; int i, j, sum = 0; +; for (k = 0; k < 37; k = g(k)) { +; for (i = 0; i < 100; i++) { +; sum *= 2; +; for (j = 0; j < 100; j++) { +; sum += A[i+j]; +; } +; } +; } +; return sum; +; } +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare i64 @g(i64) + +define i32 @f(i32* noalias %A) { +entry: + %sum.04.reg2mem = alloca i32 + %sum.12.reg2mem = alloca i32 + br label %entry.split + +entry.split: ; preds = %entry + store i32 0, i32* %sum.04.reg2mem + br label %for.body_outer_split + +for.body_outer_split: ; preds = %entry.split, %for.inc5 + %indvars.ivK = phi i64 [ 0, %entry.split ], [ %incK, %for.bos2 ] + br label %for.body_outer + +for.body_outer: ; preds = %for.body_outer_split + %incK = call i64 @g(i64 %indvars.ivK) + %exitcondK = icmp eq i64 %incK, 100 + br i1 %exitcondK, label %for.end7, label %for.body + +for.body: ; preds = %for.inc5, %for.body_outer + %indvars.iv23 = phi i64 [ 0, %for.body_outer ], [ %3, %for.inc5 ] + %sum.04.reload = load i32* %sum.04.reg2mem + %mul = shl nsw i32 %sum.04.reload, 1 + store i32 %mul, i32* %sum.12.reg2mem + br label %for.inc + +for.inc: ; preds = %for.inc, %for.body + %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ] + %sum.12.reload = load i32* %sum.12.reg2mem + %0 = add i64 %indvars.iv23, %indvars.iv1 + %arrayidx = getelementptr i32* %A, i64 %0 + %tmp5 = load i32* %arrayidx, align 4 + %add4 = add nsw i32 %tmp5, %sum.12.reload + %1 = add nuw nsw i64 %indvars.iv1, 1 + %exitcond1 = icmp eq i64 %1, 100 + store i32 %add4, i32* %sum.12.reg2mem + br i1 %exitcond1, label %for.inc5, label %for.inc + +for.inc5: ; preds = %for.inc + %2 = load i32* %sum.12.reg2mem + %3 = add nuw nsw i64 %indvars.iv23, 1 + %exitcond2 = icmp eq i64 %3, 100 + store i32 %2, i32* %sum.04.reg2mem + br i1 %exitcond2, label %for.bos2, label %for.body + +for.bos2: + br label %for.body_outer_split + +for.end7: ; preds = %for.inc5 + %4 = load i32* %sum.04.reg2mem + ret i32 %4 +} diff --git a/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll b/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll new file mode 100644 index 00000000000..c48d8f07ecc --- /dev/null +++ b/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s +; +; CHECK: Reduction dependences: +; CHECK: [N] -> { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 and i1 >= 1024 - N + i0 } +; +; void f(int N, int * restrict sums, int * restrict escape) { +; for (int i = 0; i < 1024; i++) { +; for (int j = 0; j < 1024; j++) { +; sums[i] += 5; +; if (N - i + j < 1024) +; escape[N - i + j] = sums[i]; +; } +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 5 + store i32 %add, i32* %arrayidx, align 4 + %sub = sub nsw i32 %N, %i.0 + %add4 = add nsw i32 %sub, %j.0 + %cmp5 = icmp slt i32 %add4, 1024 + br i1 %cmp5, label %if.then, label %if.end + +if.then: ; preds = %for.body3 + %arrayidx6 = getelementptr inbounds i32* %sums, i32 %i.0 + %tmp2 = load i32* %arrayidx6, align 4 + %sub7 = sub nsw i32 %N, %i.0 + %add8 = add nsw i32 %sub7, %j.0 + %arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8 + store i32 %tmp2, i32* %arrayidx9, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body3 + br label %for.inc + +for.inc: ; preds = %if.end + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_two_reductions_different_rloops.ll b/polly/test/Dependences/reduction_two_reductions_different_rloops.ll new file mode 100644 index 00000000000..4470e4e0484 --- /dev/null +++ b/polly/test/Dependences/reduction_two_reductions_different_rloops.ll @@ -0,0 +1,73 @@ +; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK: { } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { } +; CHECK: Reduction dependences: +; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 +; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, i1] : i0 <= 1022 and i0 >= 0 and i1 <= 1023 and i1 >= 0 +; +; void f(int *restrict A, int *restrict B, int *restrict Values) { +; for (int i = 0; i < 1024; i++) { +; for (int j = 0; j < 1024; j++) { +; A[i] += Values[i + j - 1]; +; B[j] += Values[i + j + 42]; +; } +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %Values) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %add = add nsw i32 %i.0, %j.0 + %sub = add nsw i32 %add, -1 + %arrayidx = getelementptr inbounds i32* %Values, i32 %sub + %tmp = load i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %A, i32 %i.0 + %tmp2 = load i32* %arrayidx4, align 4 + %add5 = add nsw i32 %tmp2, %tmp + store i32 %add5, i32* %arrayidx4, align 4 + %add6 = add nsw i32 %i.0, %j.0 + %add7 = add nsw i32 %add6, 42 + %arrayidx8 = getelementptr inbounds i32* %Values, i32 %add7 + %tmp3 = load i32* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds i32* %B, i32 %j.0 + %tmp4 = load i32* %arrayidx9, align 4 + %add10 = add nsw i32 %tmp4, %tmp3 + store i32 %add10, i32* %arrayidx9, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %inc12 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_alternating_base.ll b/polly/test/ScopInfo/reduction_alternating_base.ll new file mode 100644 index 00000000000..ce98f01c4aa --- /dev/null +++ b/polly/test/ScopInfo/reduction_alternating_base.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; FIXME: We cannot detect this SCoP yet but as soon as we can we should check +; that the reduction is detected! +; +; CHECK-NOT: Scattering +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; A[i % 2] += i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %rem = srem i32 %i.0, 2 + %arrayidx = getelementptr inbounds i32* %A, i32 %rem + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_disabled_multiplicative.ll b/polly/test/ScopInfo/reduction_disabled_multiplicative.ll new file mode 100644 index 00000000000..159e7f53305 --- /dev/null +++ b/polly/test/ScopInfo/reduction_disabled_multiplicative.ll @@ -0,0 +1,52 @@ +; RUN: opt -basicaa %loadPolly -polly-scops -analyze -polly-disable-multiplicative-reductions < %s | FileCheck %s +; +; CHECK: ReadAccess := [Reduction like: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] }; +; CHECK: MustWriteAccess := [Reduction like: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] }; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] }; +; CHECK: MustWriteAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] }; +; +; int sum, prod; +; +; void f() { +; int i; +; for (int i = 0; i < 100; i++) { +; sum += i * 3; +; prod *= (i + 3); +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +@sum = common global i32 0, align 4 +@prod = common global i32 0, align 4 + +define void @f() #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i1.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %mul = mul nsw i32 %i1.0, 3 + %tmp = load i32* @sum, align 4 + %add = add nsw i32 %tmp, %mul + store i32 %add, i32* @sum, align 4 + %add2 = add nsw i32 %i1.0, 3 + %tmp1 = load i32* @prod, align 4 + %mul3 = mul nsw i32 %tmp1, %add2 + store i32 %mul3, i32* @prod, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i1.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_escaping_intermediate.ll b/polly/test/ScopInfo/reduction_escaping_intermediate.ll new file mode 100644 index 00000000000..c5f5cc03928 --- /dev/null +++ b/polly/test/ScopInfo/reduction_escaping_intermediate.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; void f(int N, int * restrict sums, int * restrict escape) { +; int i, j; +; for (i = 0; i < 1024; i++) { +; for (j = 0; j < 1024; j++) { +; sums[i] += 5; +; escape[N - i + j] = sums[i]; +; } +; } +; } +; +; CHECK: Reduction like: 0 +; CHECK: sums +; CHECK: Reduction like: 0 +; CHECK: sums +; CHECK: Reduction like: 0 +; CHECK: escape +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc7, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end9 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 5 + store i32 %add, i32* %arrayidx, align 4 + %sub = sub nsw i32 %N, %i.0 + %add5 = add nsw i32 %sub, %j.0 + %arrayidx6 = getelementptr inbounds i32* %escape, i32 %add5 + store i32 %add, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc7 + +for.inc7: ; preds = %for.end + %inc8 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end9: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll b/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll new file mode 100644 index 00000000000..ccf87f17483 --- /dev/null +++ b/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; void f(int N, int * restrict sums, int * restrict escape) { +; int i, j; +; for (i = 0; i < 1024; i++) { +; for (j = 0; j < 1024; j++) { +; sums[i] += 5; +; escape[N-j] = escape[i] + sums[i-1]; +; } +; } +; } +; +; CHECK: Reduction like: 0 +; CHECK: sums +; CHECK: Reduction like: 0 +; CHECK: sums +; CHECK: Reduction like: 0 +; CHECK: escape +; CHECK: Reduction like: 0 +; CHECK: sums +; CHECK: Reduction like: 0 +; CHECK: escape +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 5 + store i32 %add, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %escape, i32 %i.0 + %tmp2 = load i32* %arrayidx4, align 4 + %sub = add nsw i32 %i.0, -1 + %arrayidx5 = getelementptr inbounds i32* %sums, i32 %sub + %tmp3 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %tmp2, %tmp3 + %sub7 = sub nsw i32 %N, %i.0 + %add8 = add nsw i32 %sub7, %j.0 + %arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8 + store i32 %add6, i32* %arrayidx9, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_invalid_different_operators.ll b/polly/test/ScopInfo/reduction_invalid_different_operators.ll new file mode 100644 index 00000000000..81c02bc6717 --- /dev/null +++ b/polly/test/ScopInfo/reduction_invalid_different_operators.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; int f() { +; int i, sum = 0, sth = 0; +; for (i = 0; i < 1024; i++) { +; sum += 5; +; sth = sth + sth * sth + sth; +; sum *= 5; +; } +; return sum + sth; +; } +; +; CHECK-NOT: Reduction like: 1 +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define i32 @f() { +entry: + %sum.0 = alloca i32 + %sth.0 = alloca i32 + br label %entry.split + +entry.split: ; preds = %entry + store i32 0, i32* %sum.0 + store i32 0, i32* %sth.0 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry.split + %i.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ] + %sth.0.reload = load i32* %sth.0 + %sum.0.reload = load i32* %sum.0 + %exitcond = icmp ne i32 %i.0, 1024 + %mul = mul nsw i32 %sth.0.reload, %sth.0.reload + %add1 = add nsw i32 %sth.0.reload, %mul + %tmp = mul i32 %sum.0.reload, 5 + store i32 %tmp, i32* %sum.0 + %sum.1.reload = load i32* %sum.0 + %mul3 = add i32 %sum.1.reload, 25 + %add2 = add nsw i32 %add1, %sth.0.reload + %inc = add nsw i32 %i.0, 1 + store i32 %mul3, i32* %sum.0 + store i32 %add2, i32* %sth.0 + br i1 %exitcond, label %for.cond, label %for.end + +for.end: ; preds = %for.cond + %sum.0.reload.2 = load i32* %sum.0 + %sth.0.reload.2 = load i32* %sth.0 + %add4 = add nsw i32 %sum.0.reload.2, %sth.0.reload.2 + ret i32 %add4 +} diff --git a/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll b/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll new file mode 100644 index 00000000000..a75e9de9a85 --- /dev/null +++ b/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll @@ -0,0 +1,58 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *sums) { +; int i, j; +; for (i = 0; i < 1024; i++) { +; for (j = 0; j < 1024; j++) { +; sums[i] += 5; +; sums[i+10] *= 5; +; } +; } +; } +; +; CHECK-NOT: Reduction like: 1 +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sums) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 5 + store i32 %add, i32* %arrayidx, align 4 + %add4 = add nsw i32 %i.0, 10 + %arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4 + %tmp2 = load i32* %arrayidx5, align 4 + %mul = mul nsw i32 %tmp2, 5 + store i32 %mul, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll b/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll new file mode 100644 index 00000000000..1f7013dd6d5 --- /dev/null +++ b/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll @@ -0,0 +1,78 @@ +; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Stmt_for_body +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum +; CHECK: Stmt_for_body3 +; CHECK: Reduction like: 0 +; CHECK: MemRef_A +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum +; CHECK: Stmt_for_end +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum +; +; void f(int *restrict A, int *restrict sum) { +; int i, j; +; for (i = 0; i < 100; i++) { +; *sum *= 7; +; for (j = 0; j < 100; j++) { +; *sum += A[i + j]; +; } +; *sum *= 5; +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %A, i32* noalias %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + %tmp = load i32* %sum, align 4 + %mul = mul nsw i32 %tmp, 7 + store i32 %mul, i32* %sum, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %add = add nsw i32 %i.0, %j.0 + %arrayidx = getelementptr inbounds i32* %A, i32 %add + %tmp2 = load i32* %arrayidx, align 4 + %tmp3 = load i32* %sum, align 4 + %add4 = add nsw i32 %tmp3, %tmp2 + store i32 %add4, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %tmp4 = load i32* %sum, align 4 + %mul5 = mul nsw i32 %tmp4, 5 + store i32 %mul5, i32* %sum, align 4 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll b/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll new file mode 100644 index 00000000000..5148492e1e4 --- /dev/null +++ b/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll @@ -0,0 +1,72 @@ +; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Stmt_for_body +; CHECK: Reduction like: 0 +; CHECK: MemRef_sum_04 +; CHECK: Reduction like: 0 +; CHECK: MemRef_sum_12 +; CHECK: Stmt_for_inc +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum_12 +; CHECK: Reduction like: 0 +; CHECK: MemRef_A +; CHECK: Reduction like: 1 +; CHECK: MemRef_sum_12 +; CHECK: Stmt_for_inc5 +; CHECK: Reduction like: 0 +; CHECK: MemRef_sum_12 +; CHECK: Reduction like: 0 +; CHECK: MemRef_sum_04 +; +; int f(int * __restrict__ A) { +; int i, j, sum = 1; +; for (i = 0; i < 100; i++) { +; sum *= 7; +; for (j = 0; j < 100; j++) { +; sum += A[i+j]; +; } +; } +; return sum; +; } +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @f(i32* noalias %A) { +entry: + %sum.04.reg2mem = alloca i32 + %sum.12.reg2mem = alloca i32 + br label %entry.split + +entry.split: ; preds = %entry + store i32 0, i32* %sum.04.reg2mem + br label %for.body + +for.body: ; preds = %for.inc5, %entry.split + %indvars.iv23 = phi i64 [ 0, %entry.split ], [ %3, %for.inc5 ] + %sum.04.reload = load i32* %sum.04.reg2mem + %mul = mul nsw i32 %sum.04.reload, 7 + store i32 %mul, i32* %sum.12.reg2mem + br label %for.inc + +for.inc: ; preds = %for.inc, %for.body + %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ] + %sum.12.reload = load i32* %sum.12.reg2mem + %0 = add i64 %indvars.iv23, %indvars.iv1 + %arrayidx = getelementptr i32* %A, i64 %0 + %tmp5 = load i32* %arrayidx, align 4 + %add4 = add nsw i32 %tmp5, %sum.12.reload + %1 = add nuw nsw i64 %indvars.iv1, 1 + %exitcond1 = icmp eq i64 %1, 100 + store i32 %add4, i32* %sum.12.reg2mem + br i1 %exitcond1, label %for.inc5, label %for.inc + +for.inc5: ; preds = %for.inc + %2 = load i32* %sum.12.reg2mem + %3 = add nuw nsw i64 %indvars.iv23, 1 + %exitcond2 = icmp eq i64 %3, 100 + store i32 %2, i32* %sum.04.reg2mem + br i1 %exitcond2, label %for.end7, label %for.body + +for.end7: ; preds = %for.inc5 + %4 = load i32* %sum.04.reg2mem + ret i32 %4 +} diff --git a/polly/test/ScopInfo/reduction_multiple_simple_binary.ll b/polly/test/ScopInfo/reduction_multiple_simple_binary.ll new file mode 100644 index 00000000000..52ddcd15a27 --- /dev/null +++ b/polly/test/ScopInfo/reduction_multiple_simple_binary.ll @@ -0,0 +1,98 @@ +; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[1 + i0] }; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_first[0] }; +; CHECK: ReadAccess := [Reduction like: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] }; +; CHECK: MustWriteAccess := [Reduction like: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] }; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[-1 + i0] }; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_middle[0] }; +; CHECK: ReadAccess := [Reduction like: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] }; +; CHECK: MustWriteAccess := [Reduction like: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] }; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[-1 + i0] }; +; CHECK: ReadAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[1 + i0] }; +; CHECK: MustWriteAccess := [Reduction like: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_last[0] }; +; +; int first, sum, middle, prod, last; +; +; void f(int * restrict A) { +; int i; +; for (int i = 0; i < 100; i++) { +; first = A[i+1] + A[i]; +; sum += i * 3; +; middle = A[i-1] + A[i]; +; prod *= (i + 3); +; last = A[i-1] + A[i+1]; +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +@first = common global i32 0, align 4 +@sum = common global i32 0, align 4 +@middle = common global i32 0, align 4 +@prod = common global i32 0, align 4 +@last = common global i32 0, align 4 + +define void @f(i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i1.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add nsw i32 %i1.0, 1 + %arrayidx = getelementptr inbounds i32* %A, i32 %add + %tmp = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i1.0 + %tmp1 = load i32* %arrayidx2, align 4 + %add3 = add nsw i32 %tmp, %tmp1 + store i32 %add3, i32* @first, align 4 + %mul = mul nsw i32 %i1.0, 3 + %tmp2 = load i32* @sum, align 4 + %add4 = add nsw i32 %tmp2, %mul + store i32 %add4, i32* @sum, align 4 + %sub = add nsw i32 %i1.0, -1 + %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub + %tmp3 = load i32* %arrayidx5, align 4 + %arrayidx6 = getelementptr inbounds i32* %A, i32 %i1.0 + %tmp4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %tmp3, %tmp4 + store i32 %add7, i32* @middle, align 4 + %add8 = add nsw i32 %i1.0, 3 + %tmp5 = load i32* @prod, align 4 + %mul9 = mul nsw i32 %tmp5, %add8 + store i32 %mul9, i32* @prod, align 4 + %sub10 = add nsw i32 %i1.0, -1 + %arrayidx11 = getelementptr inbounds i32* %A, i32 %sub10 + %tmp6 = load i32* %arrayidx11, align 4 + %add12 = add nsw i32 %i1.0, 1 + %arrayidx13 = getelementptr inbounds i32* %A, i32 %add12 + %tmp7 = load i32* %arrayidx13, align 4 + %add14 = add nsw i32 %tmp6, %tmp7 + store i32 %add14, i32* @last, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i1.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_non_overlapping_chains.ll b/polly/test/ScopInfo/reduction_non_overlapping_chains.ll new file mode 100644 index 00000000000..7cd1ba7e97d --- /dev/null +++ b/polly/test/ScopInfo/reduction_non_overlapping_chains.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Reduction like: 1 +; CHECK: Reduction like: 1 +; CHECK: Reduction like: 1 +; CHECK: Reduction like: 1 +; +; void f(int *sums) { +; for (int i = 0; i < 1024; i++) { +; for (int j = 0; j < 1024; j++) { +; sums[i] += 5; +; sums[i+1024] *= 5; +; } +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sums) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 5 + store i32 %add, i32* %arrayidx, align 4 + %add4 = add nsw i32 %i.0, 1024 + %arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4 + %tmp2 = load i32* %arrayidx5, align 4 + %mul = mul nsw i32 %tmp2, 5 + store i32 %mul, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/reduction_only_reduction_like_access.ll b/polly/test/ScopInfo/reduction_only_reduction_like_access.ll index 54c9fde75e1..4eb428ff7e2 100644 --- a/polly/test/ScopInfo/reduction_only_reduction_like_access.ll +++ b/polly/test/ScopInfo/reduction_only_reduction_like_access.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; -; CHECK: Reduction like: 0 +; CHECK: Reduction like: 1 ; ; void f(int *sum) { ; for (int i = 0; i < 100; i++) diff --git a/polly/test/ScopInfo/reduction_two_identical_reads.ll b/polly/test/ScopInfo/reduction_two_identical_reads.ll new file mode 100644 index 00000000000..160441bf06d --- /dev/null +++ b/polly/test/ScopInfo/reduction_two_identical_reads.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK-NOT: Reduction like: 1 +; +; Check that we do not mark these accesses as reduction like. +; We do this for the case the loads are modelt with the same LLVM-IR value and +; for the case there are different LLVM-IR values. +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; A[i] = A[i] + A[i]; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f_one_load_case(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %tmp + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0 + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @f_two_loads_case(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %arrayidxCopy = getelementptr inbounds i32* %A, i32 %i.0 + %tmpCopy = load i32* %arrayidxCopy, align 4 + %add = add nsw i32 %tmp, %tmpCopy + %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0 + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |

