summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopInfo.h43
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp125
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp18
-rw-r--r--polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll35
-rw-r--r--polly/test/Isl/CodeGen/invariant_load_outermost.ll37
-rw-r--r--polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll16
-rw-r--r--polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll4
-rw-r--r--polly/test/ScopInfo/invariant_loads_complicated_dependences.ll16
-rw-r--r--polly/test/ScopInfo/invariant_loop_bounds.ll20
-rw-r--r--polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll106
-rw-r--r--polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll109
-rw-r--r--polly/test/ScopInfo/required-invariant-loop-bounds.ll4
12 files changed, 480 insertions, 53 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 43371d5d20e..f8cdc536acb 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -680,8 +680,14 @@ using MemoryAccessList = std::forward_list<MemoryAccess *>;
/// @brief Type for invariant memory accesses and their domain context.
using InvariantAccessTy = std::pair<MemoryAccess *, isl_set *>;
+/// @brief Type for an ordered list of invariant accesses.
+using InvariantAccessListTy = std::forward_list<InvariantAccessTy>;
+
+/// @brief Type for a class of equivalent invariant memory accesses.
+using InvariantEquivClassTy = std::pair<const SCEV *, InvariantAccessListTy>;
+
/// @brief Type for multiple invariant memory accesses and their domain context.
-using InvariantAccessesTy = SmallVector<InvariantAccessTy, 8>;
+using InvariantAccessesTy = SmallVector<InvariantEquivClassTy, 8>;
///===----------------------------------------------------------------------===//
/// @brief Statement of the Scop
@@ -906,12 +912,12 @@ public:
/// @brief Add @p Access to this statement's list of accesses.
void addAccess(MemoryAccess *Access);
- /// @brief Move the memory access in @p InvMAs to @p TargetList.
+ /// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses.
///
/// Note that scalar accesses that are caused by any access in @p InvMAs will
/// be eliminated too.
void hoistMemoryAccesses(MemoryAccessList &InvMAs,
- InvariantAccessesTy &TargetList);
+ InvariantAccessesTy &InvariantEquivClasses);
typedef MemoryAccessVec::iterator iterator;
typedef MemoryAccessVec::const_iterator const_iterator;
@@ -1135,7 +1141,7 @@ private:
MinMaxVectorPairVectorTy MinMaxAliasGroups;
/// @brief List of invariant accesses.
- InvariantAccessesTy InvariantAccesses;
+ InvariantAccessesTy InvariantEquivClasses;
/// @brief Scop constructor; invoked from ScopInfo::buildScop.
Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD,
@@ -1186,6 +1192,20 @@ private:
/// @see isIgnored()
void simplifySCoP(bool RemoveIgnoredStmts);
+ /// @brief Create equivalence classes for required invariant accesses.
+ ///
+ /// These classes will consolidate multiple required invariant loads from the
+ /// same address in order to keep the number of dimensions in the SCoP
+ /// description small. For each such class equivalence class only one
+ /// representing element, hence one required invariant load, will be chosen
+ /// and modeled as parameter. The method
+ /// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an
+ /// equivalence class with the representing element that is modeled. As a
+ /// consequence Scop::getIdForParam() will only return an id for the
+ /// representing element of each equivalence class, thus for each required
+ /// invariant location.
+ void buildInvariantEquivalenceClasses();
+
/// @brief Hoist invariant memory loads and check for required ones.
///
/// We first identify "common" invariant loads, thus loads that are invariant
@@ -1220,6 +1240,19 @@ private:
/// @brief Simplify the assumed and boundary context.
void simplifyContexts();
+ /// @brief Get the representing SCEV for @p S if applicable, otherwise @p S.
+ ///
+ /// Invariant loads of the same location are put in an equivalence class and
+ /// only one of them is chosen as a representing element that will be
+ /// modeled as a parameter. The others have to be normalized, i.e.,
+ /// replaced by the representing element of their equivalence class, in order
+ /// to get the correct parameter value, e.g., in the SCEVAffinator.
+ ///
+ /// @param S The SCEV to normalize.
+ ///
+ /// @return The representing SCEV for invariant loads or @p S if none.
+ const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S) const;
+
/// @brief Create a new SCoP statement for either @p BB or @p R.
///
/// Either @p BB or @p R should be non-null. A new statement for the non-null
@@ -1340,7 +1373,7 @@ public:
/// @brief Return the set of invariant accesses.
const InvariantAccessesTy &getInvariantAccesses() const {
- return InvariantAccesses;
+ return InvariantEquivClasses;
}
/// @brief Mark the SCoP as optimized by the scheduler.
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 0a6394d1256..34f5114feaa 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -1356,7 +1356,7 @@ void ScopStmt::print(raw_ostream &OS) const {
void ScopStmt::dump() const { print(dbgs()); }
void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
- InvariantAccessesTy &TargetList) {
+ InvariantAccessesTy &InvariantEquivClasses) {
// Remove all memory accesses in @p InvMAs from this statement together
// with all scalar accesses that were caused by them. The tricky iteration
@@ -1409,8 +1409,49 @@ void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
}
}
- for (MemoryAccess *MA : InvMAs)
- TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx)));
+ for (MemoryAccess *MA : InvMAs) {
+
+ // Check for another invariant access that accesses the same location as
+ // MA and if found consolidate them. Otherwise create a new equivalence
+ // class at the end of InvariantEquivClasses.
+ LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
+ const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
+ bool Consolidated = false;
+
+ for (auto &IAClass : InvariantEquivClasses) {
+ const SCEV *ClassPointerSCEV = IAClass.first;
+ if (PointerSCEV != ClassPointerSCEV)
+ continue;
+
+ Consolidated = true;
+
+ // We created empty equivalence classes for required invariant loads
+ // in the beginning and might encounter one of them here. If so, this
+ // MA will be the first in that equivalence class.
+ auto &ClassList = IAClass.second;
+ if (ClassList.empty()) {
+ ClassList.push_front(std::make_pair(MA, isl_set_copy(DomainCtx)));
+ break;
+ }
+
+ // If the equivalence class for MA is not empty we unify the execution
+ // context and add MA to the list of accesses that are in this class.
+ isl_set *IAClassDomainCtx = IAClass.second.front().second;
+ IAClassDomainCtx =
+ isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx));
+ ClassList.push_front(std::make_pair(MA, IAClassDomainCtx));
+ break;
+ }
+
+ if (Consolidated)
+ continue;
+
+ // If we did not consolidate MA, thus did not find an equivalence class
+ // that for it, we create a new one.
+ InvariantAccessTy IA = std::make_pair(MA, isl_set_copy(DomainCtx));
+ InvariantEquivClasses.emplace_back(InvariantEquivClassTy(
+ std::make_pair(PointerSCEV, InvariantAccessListTy({IA}))));
+ }
isl_set_free(DomainCtx);
}
@@ -1424,9 +1465,34 @@ void Scop::setContext(__isl_take isl_set *NewContext) {
Context = NewContext;
}
+const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) const {
+ const SCEVUnknown *SU = dyn_cast_or_null<SCEVUnknown>(S);
+ if (!SU)
+ return S;
+
+ LoadInst *LInst = dyn_cast<LoadInst>(SU->getValue());
+ if (!LInst)
+ return S;
+
+ // Try to find an equivalence class for the load, if found return
+ // the SCEV for the representing element, otherwise return S.
+ const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
+ for (const InvariantEquivClassTy &IAClass : InvariantEquivClasses) {
+ const SCEV *ClassPointerSCEV = IAClass.first;
+ if (ClassPointerSCEV == PointerSCEV)
+ return ClassPointerSCEV;
+ }
+
+ return S;
+}
+
void Scop::addParams(std::vector<const SCEV *> NewParameters) {
for (const SCEV *Parameter : NewParameters) {
Parameter = extractConstantFactor(Parameter, *SE).second;
+
+ // Normalize the SCEV to get the representing element for an invariant load.
+ Parameter = getRepresentingInvariantLoadSCEV(Parameter);
+
if (ParameterIds.find(Parameter) != ParameterIds.end())
continue;
@@ -1438,6 +1504,9 @@ void Scop::addParams(std::vector<const SCEV *> NewParameters) {
}
__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const {
+ // Normalize the SCEV to get the representing element for an invariant load.
+ Parameter = getRepresentingInvariantLoadSCEV(Parameter);
+
ParamIdType::const_iterator IdIter = ParameterIds.find(Parameter);
if (IdIter == ParameterIds.end())
@@ -1515,6 +1584,21 @@ void Scop::addUserContext() {
isl_space_free(Space);
}
+void Scop::buildInvariantEquivalenceClasses() {
+ const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion());
+ SmallPtrSet<const SCEV *, 4> ClassPointerSet;
+ for (LoadInst *LInst : RIL) {
+ const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
+
+ // Skip the load if we already have a equivalence class for the pointer.
+ if (!ClassPointerSet.insert(PointerSCEV).second)
+ continue;
+
+ InvariantEquivClasses.emplace_back(InvariantEquivClassTy(
+ std::make_pair(PointerSCEV, InvariantAccessListTy())));
+ }
+}
+
void Scop::buildContext() {
isl_space *Space = isl_space_params_alloc(IslCtx, 0);
Context = isl_set_universe(isl_space_copy(Space));
@@ -2337,6 +2421,8 @@ Scop::Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD,
void Scop::init(AliasAnalysis &AA) {
buildContext();
+ buildInvariantEquivalenceClasses();
+
buildDomains(&R);
// Remove empty and ignored statements.
@@ -2388,8 +2474,9 @@ Scop::~Scop() {
}
}
- for (const auto &IA : InvariantAccesses)
- isl_set_free(IA.second);
+ for (const auto &IAClass : InvariantEquivClasses)
+ if (!IAClass.second.empty())
+ isl_set_free(IAClass.second.front().second);
}
void Scop::updateAccessDimensionality() {
@@ -2478,17 +2565,18 @@ void Scop::hoistInvariantLoads() {
InvMAs.reverse();
// Transfer the memory access from the statement to the SCoP.
- Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses);
+ Stmt.hoistMemoryAccesses(InvMAs, InvariantEquivClasses);
isl_set_free(Domain);
}
isl_union_map_free(Writes);
- if (!InvariantAccesses.empty())
+ if (!InvariantEquivClasses.empty())
IsOptimized = true;
+ auto &ScopRIL = *SD.getRequiredInvariantLoads(&getRegion());
// Check required invariant loads that were tagged during SCoP detection.
- for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) {
+ for (LoadInst *LI : ScopRIL) {
assert(LI && getRegion().contains(LI));
ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent());
if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) {
@@ -2511,8 +2599,12 @@ void Scop::hoistInvariantLoads() {
// we already ordered the accesses such that indirect loads can be resolved,
// thus we use a stable sort here.
- auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0,
- const InvariantAccessTy &IA1) {
+ auto compareInvariantAccesses = [this](
+ const InvariantEquivClassTy &IAClass0,
+ const InvariantEquivClassTy &IAClass1) {
+ const InvariantAccessTy &IA0 = IAClass0.second.front();
+ const InvariantAccessTy &IA1 = IAClass1.second.front();
+
Instruction *AI0 = IA0.first->getAccessInstruction();
Instruction *AI1 = IA1.first->getAccessInstruction();
@@ -2554,7 +2646,7 @@ void Scop::hoistInvariantLoads() {
return Involves1Id0;
};
- std::stable_sort(InvariantAccesses.begin(), InvariantAccesses.end(),
+ std::stable_sort(InvariantEquivClasses.begin(), InvariantEquivClasses.end(),
compareInvariantAccesses);
}
@@ -2739,9 +2831,14 @@ void Scop::print(raw_ostream &OS) const {
OS.indent(4) << "Region: " << getNameStr() << "\n";
OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
OS.indent(4) << "Invariant Accesses: {\n";
- for (const auto &IA : InvariantAccesses) {
- IA.first->print(OS);
- OS.indent(12) << "Execution Context: " << IA.second << "\n";
+ for (const auto &IAClass : InvariantEquivClasses) {
+ if (IAClass.second.empty()) {
+ OS.indent(12) << "Class Pointer: " << IAClass.first << "\n";
+ } else {
+ IAClass.second.front().first->print(OS);
+ OS.indent(12) << "Execution Context: " << IAClass.second.front().second
+ << "\n";
+ }
}
OS.indent(4) << "}\n";
printContext(OS.indent(4));
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index f5ee8c9abfc..086ff7e91d8 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -906,8 +906,8 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
void IslNodeBuilder::preloadInvariantLoads() {
- const auto &InvAccList = S.getInvariantAccesses();
- if (InvAccList.empty())
+ const auto &InvariantEquivClasses = S.getInvariantAccesses();
+ if (InvariantEquivClasses.empty())
return;
const Region &R = S.getRegion();
@@ -921,14 +921,20 @@ void IslNodeBuilder::preloadInvariantLoads() {
isl_ast_build *Build =
isl_ast_build_from_context(isl_set_universe(S.getParamSpace()));
- for (const auto &IA : InvAccList) {
- MemoryAccess *MA = IA.first;
+ // For each equivalence class of invariant loads we pre-load the representing
+ // element with the unified execution context. However, we have to map all
+ // elements of the class to the one preloaded load as they are referenced
+ // during the code generation and therefor need to be mapped.
+ for (const auto &IAClass : InvariantEquivClasses) {
+
+ MemoryAccess *MA = IAClass.second.front().first;
assert(!MA->isImplicit());
- isl_set *Domain = isl_set_copy(IA.second);
+ isl_set *Domain = isl_set_copy(IAClass.second.front().second);
Instruction *AccInst = MA->getAccessInstruction();
Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build);
- ValueMap[AccInst] = PreloadVal;
+ for (const InvariantAccessTy &IA : IAClass.second)
+ ValueMap[IA.first->getAccessInstruction()] = PreloadVal;
if (SE.isSCEVable(AccInst->getType())) {
isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst));
diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
new file mode 100644
index 00000000000..cbebd60ce43
--- /dev/null
+++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
@@ -0,0 +1,35 @@
+; RUN: opt %loadPolly -polly-codegen -polly-parallel \
+; RUN: -polly-parallel-force -S < %s | FileCheck %s
+;
+; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction.
+;
+; void f(float *A) {
+; for (int i = 1; i < 1000; i++)
+; A[i] += A[0] + A[0];
+; }
+;
+; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
+; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* nocapture %A) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+ %tmp = load float, float* %A, align 4
+ %tmp2 = load float, float* %A, align 4
+ %tmpadd = fadd float %tmp, %tmp2
+ %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %tmp1 = load float, float* %arrayidx1, align 4
+ %add = fadd float %tmp2, %tmp1
+ store float %add, float* %arrayidx1, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/polly/test/Isl/CodeGen/invariant_load_outermost.ll b/polly/test/Isl/CodeGen/invariant_load_outermost.ll
new file mode 100644
index 00000000000..49623fd7824
--- /dev/null
+++ b/polly/test/Isl/CodeGen/invariant_load_outermost.ll
@@ -0,0 +1,37 @@
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
+
+; CHECK: polly.start
+
+; void f(int *A) {
+; if (*A > 42)
+; *A = *A + 1;
+; else
+; *A = *A - 1;
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A) {
+entry:
+ br label %entry.split
+
+entry.split:
+ %tmp = load i32, i32* %A, align 4
+ %cmp = icmp sgt i32 %tmp, 42
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %tmp1 = load i32, i32* %A, align 4
+ %add = add nsw i32 %tmp1, 1
+ br label %if.end
+
+if.else: ; preds = %entry
+ %tmp2 = load i32, i32* %A, align 4
+ %sub = add nsw i32 %tmp2, -1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ]
+ store i32 %storemerge, i32* %A, align 4
+ ret void
+}
diff --git a/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll b/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll
index 333f620be71..552fc91b57a 100644
--- a/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll
+++ b/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll
@@ -2,9 +2,9 @@
; RUN: -polly-codegen -S < %s | FileCheck %s
; CHECK: polly.start
-
+; int /* pure */ g()
; void f(int *A) {
-; if (*A > 42)
+; if (g())
; *A = *A + 1;
; else
; *A = *A - 1;
@@ -17,22 +17,26 @@ entry:
br label %entry.split
entry.split:
- %tmp = load i32, i32* %A, align 4
- %cmp = icmp sgt i32 %tmp, 42
+ %call = call i32 @g()
+ %cmp = icmp eq i32 %call, 0
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%tmp1 = load i32, i32* %A, align 4
%add = add nsw i32 %tmp1, 1
+ store i32 %add, i32* %A, align 4
br label %if.end
if.else: ; preds = %entry
%tmp2 = load i32, i32* %A, align 4
%sub = add nsw i32 %tmp2, -1
+ store i32 %sub, i32* %A, align 4
br label %if.end
if.end: ; preds = %if.else, %if.then
- %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ]
- store i32 %storemerge, i32* %A, align 4
ret void
}
+
+declare i32 @g() #0
+
+attributes #0 = { nounwind readnone }
diff --git a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
index 11e3b4e4916..f09f8173556 100644
--- a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
+++ b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
@@ -17,8 +17,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK: Invariant Accesses: {
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_init_ptr[0]
-; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK: MemRef_init_ptr[0]
+; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NOT: MemRef_init_ptr[0]
; CHECK: }
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
diff --git a/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll b/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll
index dddb185d7ec..ecf6a95f865 100644
--- a/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll
+++ b/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll
@@ -2,17 +2,17 @@
;
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: [tmp, tmp5] -> { Stmt_for_body[i0] -> MemRef_LB[0] };
-; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : }
+; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LB[0] };
+; CHECK-NEXT: Execution Context: [LB, UB] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: [tmp, tmp5] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] };
-; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : }
+; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] };
+; CHECK-NEXT: Execution Context: [LB, UB] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] };
-; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : (tmp5 >= 1 + tmp and tmp5 >= 6) or tmp >= 6 }
+; CHECK-NEXT: [LB, UB] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] };
+; CHECK-NEXT: Execution Context: [LB, UB] -> { : (UB >= 1 + LB and UB >= 6) or LB >= 6 }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] };
-; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : tmp <= 5 }
+; CHECK-NEXT: [LB, UB] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] };
+; CHECK-NEXT: Execution Context: [LB, UB] -> { : LB <= 5 }
; CHECK-NEXT: }
;
; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB,
diff --git a/polly/test/ScopInfo/invariant_loop_bounds.ll b/polly/test/ScopInfo/invariant_loop_bounds.ll
index 3fc03b98751..18db15a6b0a 100644
--- a/polly/test/ScopInfo/invariant_loop_bounds.ll
+++ b/polly/test/ScopInfo/invariant_loop_bounds.ll
@@ -3,28 +3,28 @@
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[2]
-; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : }
+; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[1]
-; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 }
+; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_0 >= 1 }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[0]
-; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 }
+; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_1 >= 1 and p_0 >= 1 }
; CHECK-NEXT: }
;
-; CHECK: p0: %tmp
-; CHECK: p1: %tmp8
-; CHECK: p2: %tmp10
+; CHECK: p0: (8 + @bounds)<nsw>
+; CHECK: p1: (4 + @bounds)<nsw>
+; CHECK: p2: @bounds
; CHECK: Statements {
; CHECK: Stmt_for_body_6
; CHECK: Domain :=
-; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 };
+; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + p_0 and i1 >= 0 and i1 <= -1 + p_1 and i2 >= 0 and i2 <= -1 + bounds };
; CHECK: Schedule :=
-; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] };
+; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] };
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
+; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
+; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
; CHECK: }
;
; int bounds[3];
diff --git a/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll
new file mode 100644
index 00000000000..d901575ade3
--- /dev/null
+++ b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll
@@ -0,0 +1,106 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; Verify that we only have one parameter and one invariant load for all
+; three loads that occure in the region but actually access the same
+; location. Also check that the execution context is the most generic
+; one, e.g., here the universal set.
+;
+; CHECK: Invariant Accesses: {
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: MemRef_bounds[0]
+; CHECK-NEXT: Execution Context: [bounds] -> { : }
+; CHECK-NEXT: }
+;
+; CHECK: p0: @bounds
+; CHECK-NOT: p1
+; CHECK: Statements {
+; CHECK: Stmt_for_body_6
+; CHECK: Domain :=
+; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds };
+; CHECK: Schedule :=
+; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] };
+; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
+; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
+; CHECK: }
+;
+; int bounds[1];
+; double data[1024][1024][1024];
+;
+; void foo() {
+; int i, j, k;
+; for (k = 0; k < bounds[0]; k++)
+; for (j = 0; j < bounds[0]; j++)
+; for (i = 0; i < bounds[0]; i++)
+; data[k][j][i] += i + j + k;
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@bounds = common global [1 x i32] zeroinitializer, align 4
+@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16
+
+define void @foo() {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc.16, %entry
+ %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ]
+ %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4
+ %tmp7 = sext i32 %tmp to i64
+ %cmp = icmp slt i64 %indvars.iv5, %tmp7
+ br i1 %cmp, label %for.body, label %for.end.18
+
+for.body: ; preds = %for.cond
+ br label %for.cond.1
+
+for.cond.1: ; preds = %for.inc.13, %for.body
+ %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ]
+ %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4
+ %tmp9 = sext i32 %tmp8 to i64
+ %cmp2 = icmp slt i64 %indvars.iv3, %tmp9
+ br i1 %cmp2, label %for.body.3, label %for.end.15
+
+for.body.3: ; preds = %for.cond.1
+ br label %for.cond.4
+
+for.cond.4: ; preds = %for.inc, %for.body.3
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ]
+ %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4
+ %tmp11 = sext i32 %tmp10 to i64
+ %cmp5 = icmp slt i64 %indvars.iv, %tmp11
+ br i1 %cmp5, label %for.body.6, label %for.end
+
+for.body.6: ; preds = %for.cond.4
+ %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3
+ %tmp13 = add nsw i64 %tmp12, %indvars.iv5
+ %tmp14 = trunc i64 %tmp13 to i32
+ %conv = sitofp i32 %tmp14 to double
+ %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv
+ %tmp15 = load double, double* %arrayidx11, align 8
+ %add12 = fadd double %tmp15, %conv
+ store double %add12, double* %arrayidx11, align 8
+ br label %for.inc
+
+for.inc: ; preds = %for.body.6
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond.4
+
+for.end: ; preds = %for.cond.4
+ br label %for.inc.13
+
+for.inc.13: ; preds = %for.end
+ %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
+ br label %for.cond.1
+
+for.end.15: ; preds = %for.cond.1
+ br label %for.inc.16
+
+for.inc.16: ; preds = %for.end.15
+ %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
+ br label %for.cond
+
+for.end.18: ; preds = %for.cond
+ ret void
+}
diff --git a/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll
new file mode 100644
index 00000000000..27b0e343109
--- /dev/null
+++ b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll
@@ -0,0 +1,109 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; Verify that we only have one parameter and one invariant load for all
+; three loads that occure in the region but actually access the same
+; location. Also check that the execution context is the most generic
+; one, e.g., here the universal set.
+;
+; CHECK: Invariant Accesses: {
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: MemRef_bounds[0]
+; CHECK-NEXT: Execution Context: [bounds, p] -> { : }
+; CHECK-NEXT: }
+;
+; CHECK: p0: @bounds
+; CHECK: p1: %p
+; CHECK-NOT: p2:
+; CHECK: Statements {
+; CHECK: Stmt_for_body_6
+; CHECK: Domain :=
+; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds };
+; CHECK: Schedule :=
+; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] };
+; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
+; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
+; CHECK: }
+;
+; int bounds[1];
+; double data[1024][1024][1024];
+;
+; void foo(int p) {
+; int i, j, k;
+; for (k = 0; k < bounds[0]; k++)
+; if (p == 0)
+; for (j = 0; j < bounds[0]; j++)
+; for (i = 0; i < bounds[0]; i++)
+; data[k][j][i] += i + j + k;
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@bounds = common global [1 x i32] zeroinitializer, align 4
+@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16
+
+define void @foo(i32 %p) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc.16, %entry
+ %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ]
+ %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4
+ %tmp7 = sext i32 %tmp to i64
+ %cmp = icmp slt i64 %indvars.iv5, %tmp7
+ br i1 %cmp, label %for.body, label %for.end.18
+
+for.body: ; preds = %for.cond
+ %cmpp = icmp eq i32 %p, 0
+ br i1 %cmpp, label %for.cond.1, label %for.inc.16
+
+for.cond.1: ; preds = %for.inc.13, %for.body
+ %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ]
+ %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4
+ %tmp9 = sext i32 %tmp8 to i64
+ %cmp2 = icmp slt i64 %indvars.iv3, %tmp9
+ br i1 %cmp2, label %for.body.3, label %for.end.15
+
+for.body.3: ; preds = %for.cond.1
+ br label %for.cond.4
+
+for.cond.4: ; preds = %for.inc, %for.body.3
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ]
+ %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4
+ %tmp11 = sext i32 %tmp10 to i64
+ %cmp5 = icmp slt i64 %indvars.iv, %tmp11
+ br i1 %cmp5, label %for.body.6, label %for.end
+
+for.body.6: ; preds = %for.cond.4
+ %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3
+ %tmp13 = add nsw i64 %tmp12, %indvars.iv5
+ %tmp14 = trunc i64 %tmp13 to i32
+ %conv = sitofp i32 %tmp14 to double
+ %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv
+ %tmp15 = load double, double* %arrayidx11, align 8
+ %add12 = fadd double %tmp15, %conv
+ store double %add12, double* %arrayidx11, align 8
+ br label %for.inc
+
+for.inc: ; preds = %for.body.6
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond.4
+
+for.end: ; preds = %for.cond.4
+ br label %for.inc.13
+
+for.inc.13: ; preds = %for.end
+ %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
+ br label %for.cond.1
+
+for.end.15: ; preds = %for.cond.1
+ br label %for.inc.16
+
+for.inc.16: ; preds = %for.end.15
+ %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
+ br label %for.cond
+
+for.end.18: ; preds = %for.cond
+ ret void
+}
diff --git a/polly/test/ScopInfo/required-invariant-loop-bounds.ll b/polly/test/ScopInfo/required-invariant-loop-bounds.ll
index 25bf1b630ee..06d15e53884 100644
--- a/polly/test/ScopInfo/required-invariant-loop-bounds.ll
+++ b/polly/test/ScopInfo/required-invariant-loop-bounds.ll
@@ -3,10 +3,10 @@
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[0]
-; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : }
+; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[1]
-; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : tmp >= 0 }
+; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : bounds >= 0 }
; CHECK: }
; double A[1000][1000];
OpenPOWER on IntegriCloud