diff options
author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-10-09 17:12:26 +0000 |
---|---|---|
committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-10-09 17:12:26 +0000 |
commit | 697fdf891c50f50a504574dfc46a5a832b10ccc6 (patch) | |
tree | 6d2c7b163ebf9a3f4f0a37cdfd364a8a39a2a0fe /polly | |
parent | 769e1a972d3fc59b74906baf4de1a518b81d1e02 (diff) | |
download | bcm5719-llvm-697fdf891c50f50a504574dfc46a5a832b10ccc6.tar.gz bcm5719-llvm-697fdf891c50f50a504574dfc46a5a832b10ccc6.zip |
Consolidate invariant loads
If a (assumed) invariant location is loaded multiple times we
generated a parameter for each location. However, this caused compile
time problems for several benchmarks (e.g., 445_gobmk in SPEC2006 and
BT in the NAS benchmarks). Additionally, the code we generate is
suboptimal as we preload the same location multiple times and perform
the same checks on all the parameters that refere to the same value.
With this patch we consolidate the invariant loads in three steps:
1) During SCoP initialization required invariant loads are put in
equivalence classes based on their pointer operand. One
representing load is used to generate a parameter for the whole
class, thus we never generate multiple parameters for the same
location.
2) During the SCoP simplification we remove invariant memory
accesses that are in the same equivalence class. While doing so
we build the union of all execution domains as it is only
important that the location is at least accessed once.
3) During code generation we only preload one element of each
equivalence class with the unified execution domain. All others
are mapped to that preloaded value.
Differential Revision: http://reviews.llvm.org/D13338
llvm-svn: 249853
Diffstat (limited to 'polly')
-rw-r--r-- | polly/include/polly/ScopInfo.h | 43 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 125 | ||||
-rw-r--r-- | polly/lib/CodeGen/IslNodeBuilder.cpp | 18 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll | 35 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/invariant_load_outermost.ll | 37 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll | 16 | ||||
-rw-r--r-- | polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll | 4 | ||||
-rw-r--r-- | polly/test/ScopInfo/invariant_loads_complicated_dependences.ll | 16 | ||||
-rw-r--r-- | polly/test/ScopInfo/invariant_loop_bounds.ll | 20 | ||||
-rw-r--r-- | polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll | 106 | ||||
-rw-r--r-- | polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll | 109 | ||||
-rw-r--r-- | polly/test/ScopInfo/required-invariant-loop-bounds.ll | 4 |
12 files changed, 480 insertions, 53 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 43371d5d20e..f8cdc536acb 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -680,8 +680,14 @@ using MemoryAccessList = std::forward_list<MemoryAccess *>; /// @brief Type for invariant memory accesses and their domain context. using InvariantAccessTy = std::pair<MemoryAccess *, isl_set *>; +/// @brief Type for an ordered list of invariant accesses. +using InvariantAccessListTy = std::forward_list<InvariantAccessTy>; + +/// @brief Type for a class of equivalent invariant memory accesses. +using InvariantEquivClassTy = std::pair<const SCEV *, InvariantAccessListTy>; + /// @brief Type for multiple invariant memory accesses and their domain context. -using InvariantAccessesTy = SmallVector<InvariantAccessTy, 8>; +using InvariantAccessesTy = SmallVector<InvariantEquivClassTy, 8>; ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop @@ -906,12 +912,12 @@ public: /// @brief Add @p Access to this statement's list of accesses. void addAccess(MemoryAccess *Access); - /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses. /// /// Note that scalar accesses that are caused by any access in @p InvMAs will /// be eliminated too. void hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList); + InvariantAccessesTy &InvariantEquivClasses); typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1135,7 +1141,7 @@ private: MinMaxVectorPairVectorTy MinMaxAliasGroups; /// @brief List of invariant accesses. - InvariantAccessesTy InvariantAccesses; + InvariantAccessesTy InvariantEquivClasses; /// @brief Scop constructor; invoked from ScopInfo::buildScop. Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD, @@ -1186,6 +1192,20 @@ private: /// @see isIgnored() void simplifySCoP(bool RemoveIgnoredStmts); + /// @brief Create equivalence classes for required invariant accesses. + /// + /// These classes will consolidate multiple required invariant loads from the + /// same address in order to keep the number of dimensions in the SCoP + /// description small. For each such class equivalence class only one + /// representing element, hence one required invariant load, will be chosen + /// and modeled as parameter. The method + /// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an + /// equivalence class with the representing element that is modeled. As a + /// consequence Scop::getIdForParam() will only return an id for the + /// representing element of each equivalence class, thus for each required + /// invariant location. + void buildInvariantEquivalenceClasses(); + /// @brief Hoist invariant memory loads and check for required ones. /// /// We first identify "common" invariant loads, thus loads that are invariant @@ -1220,6 +1240,19 @@ private: /// @brief Simplify the assumed and boundary context. void simplifyContexts(); + /// @brief Get the representing SCEV for @p S if applicable, otherwise @p S. + /// + /// Invariant loads of the same location are put in an equivalence class and + /// only one of them is chosen as a representing element that will be + /// modeled as a parameter. The others have to be normalized, i.e., + /// replaced by the representing element of their equivalence class, in order + /// to get the correct parameter value, e.g., in the SCEVAffinator. + /// + /// @param S The SCEV to normalize. + /// + /// @return The representing SCEV for invariant loads or @p S if none. + const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S) const; + /// @brief Create a new SCoP statement for either @p BB or @p R. /// /// Either @p BB or @p R should be non-null. A new statement for the non-null @@ -1340,7 +1373,7 @@ public: /// @brief Return the set of invariant accesses. const InvariantAccessesTy &getInvariantAccesses() const { - return InvariantAccesses; + return InvariantEquivClasses; } /// @brief Mark the SCoP as optimized by the scheduler. diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 0a6394d1256..34f5114feaa 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -1356,7 +1356,7 @@ void ScopStmt::print(raw_ostream &OS) const { void ScopStmt::dump() const { print(dbgs()); } void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList) { + InvariantAccessesTy &InvariantEquivClasses) { // Remove all memory accesses in @p InvMAs from this statement together // with all scalar accesses that were caused by them. The tricky iteration @@ -1409,8 +1409,49 @@ void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, } } - for (MemoryAccess *MA : InvMAs) - TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + for (MemoryAccess *MA : InvMAs) { + + // Check for another invariant access that accesses the same location as + // MA and if found consolidate them. Otherwise create a new equivalence + // class at the end of InvariantEquivClasses. + LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); + const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); + bool Consolidated = false; + + for (auto &IAClass : InvariantEquivClasses) { + const SCEV *ClassPointerSCEV = IAClass.first; + if (PointerSCEV != ClassPointerSCEV) + continue; + + Consolidated = true; + + // We created empty equivalence classes for required invariant loads + // in the beginning and might encounter one of them here. If so, this + // MA will be the first in that equivalence class. + auto &ClassList = IAClass.second; + if (ClassList.empty()) { + ClassList.push_front(std::make_pair(MA, isl_set_copy(DomainCtx))); + break; + } + + // If the equivalence class for MA is not empty we unify the execution + // context and add MA to the list of accesses that are in this class. + isl_set *IAClassDomainCtx = IAClass.second.front().second; + IAClassDomainCtx = + isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx)); + ClassList.push_front(std::make_pair(MA, IAClassDomainCtx)); + break; + } + + if (Consolidated) + continue; + + // If we did not consolidate MA, thus did not find an equivalence class + // that for it, we create a new one. + InvariantAccessTy IA = std::make_pair(MA, isl_set_copy(DomainCtx)); + InvariantEquivClasses.emplace_back(InvariantEquivClassTy( + std::make_pair(PointerSCEV, InvariantAccessListTy({IA})))); + } isl_set_free(DomainCtx); } @@ -1424,9 +1465,34 @@ void Scop::setContext(__isl_take isl_set *NewContext) { Context = NewContext; } +const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) const { + const SCEVUnknown *SU = dyn_cast_or_null<SCEVUnknown>(S); + if (!SU) + return S; + + LoadInst *LInst = dyn_cast<LoadInst>(SU->getValue()); + if (!LInst) + return S; + + // Try to find an equivalence class for the load, if found return + // the SCEV for the representing element, otherwise return S. + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + for (const InvariantEquivClassTy &IAClass : InvariantEquivClasses) { + const SCEV *ClassPointerSCEV = IAClass.first; + if (ClassPointerSCEV == PointerSCEV) + return ClassPointerSCEV; + } + + return S; +} + void Scop::addParams(std::vector<const SCEV *> NewParameters) { for (const SCEV *Parameter : NewParameters) { Parameter = extractConstantFactor(Parameter, *SE).second; + + // Normalize the SCEV to get the representing element for an invariant load. + Parameter = getRepresentingInvariantLoadSCEV(Parameter); + if (ParameterIds.find(Parameter) != ParameterIds.end()) continue; @@ -1438,6 +1504,9 @@ void Scop::addParams(std::vector<const SCEV *> NewParameters) { } __isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const { + // Normalize the SCEV to get the representing element for an invariant load. + Parameter = getRepresentingInvariantLoadSCEV(Parameter); + ParamIdType::const_iterator IdIter = ParameterIds.find(Parameter); if (IdIter == ParameterIds.end()) @@ -1515,6 +1584,21 @@ void Scop::addUserContext() { isl_space_free(Space); } +void Scop::buildInvariantEquivalenceClasses() { + const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion()); + SmallPtrSet<const SCEV *, 4> ClassPointerSet; + for (LoadInst *LInst : RIL) { + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + + // Skip the load if we already have a equivalence class for the pointer. + if (!ClassPointerSet.insert(PointerSCEV).second) + continue; + + InvariantEquivClasses.emplace_back(InvariantEquivClassTy( + std::make_pair(PointerSCEV, InvariantAccessListTy()))); + } +} + void Scop::buildContext() { isl_space *Space = isl_space_params_alloc(IslCtx, 0); Context = isl_set_universe(isl_space_copy(Space)); @@ -2337,6 +2421,8 @@ Scop::Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD, void Scop::init(AliasAnalysis &AA) { buildContext(); + buildInvariantEquivalenceClasses(); + buildDomains(&R); // Remove empty and ignored statements. @@ -2388,8 +2474,9 @@ Scop::~Scop() { } } - for (const auto &IA : InvariantAccesses) - isl_set_free(IA.second); + for (const auto &IAClass : InvariantEquivClasses) + if (!IAClass.second.empty()) + isl_set_free(IAClass.second.front().second); } void Scop::updateAccessDimensionality() { @@ -2478,17 +2565,18 @@ void Scop::hoistInvariantLoads() { InvMAs.reverse(); // Transfer the memory access from the statement to the SCoP. - Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses); + Stmt.hoistMemoryAccesses(InvMAs, InvariantEquivClasses); isl_set_free(Domain); } isl_union_map_free(Writes); - if (!InvariantAccesses.empty()) + if (!InvariantEquivClasses.empty()) IsOptimized = true; + auto &ScopRIL = *SD.getRequiredInvariantLoads(&getRegion()); // Check required invariant loads that were tagged during SCoP detection. - for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) { + for (LoadInst *LI : ScopRIL) { assert(LI && getRegion().contains(LI)); ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent()); if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) { @@ -2511,8 +2599,12 @@ void Scop::hoistInvariantLoads() { // we already ordered the accesses such that indirect loads can be resolved, // thus we use a stable sort here. - auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0, - const InvariantAccessTy &IA1) { + auto compareInvariantAccesses = [this]( + const InvariantEquivClassTy &IAClass0, + const InvariantEquivClassTy &IAClass1) { + const InvariantAccessTy &IA0 = IAClass0.second.front(); + const InvariantAccessTy &IA1 = IAClass1.second.front(); + Instruction *AI0 = IA0.first->getAccessInstruction(); Instruction *AI1 = IA1.first->getAccessInstruction(); @@ -2554,7 +2646,7 @@ void Scop::hoistInvariantLoads() { return Involves1Id0; }; - std::stable_sort(InvariantAccesses.begin(), InvariantAccesses.end(), + std::stable_sort(InvariantEquivClasses.begin(), InvariantEquivClasses.end(), compareInvariantAccesses); } @@ -2739,9 +2831,14 @@ void Scop::print(raw_ostream &OS) const { OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; OS.indent(4) << "Invariant Accesses: {\n"; - for (const auto &IA : InvariantAccesses) { - IA.first->print(OS); - OS.indent(12) << "Execution Context: " << IA.second << "\n"; + for (const auto &IAClass : InvariantEquivClasses) { + if (IAClass.second.empty()) { + OS.indent(12) << "Class Pointer: " << IAClass.first << "\n"; + } else { + IAClass.second.front().first->print(OS); + OS.indent(12) << "Execution Context: " << IAClass.second.front().second + << "\n"; + } } OS.indent(4) << "}\n"; printContext(OS.indent(4)); diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index f5ee8c9abfc..086ff7e91d8 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -906,8 +906,8 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, void IslNodeBuilder::preloadInvariantLoads() { - const auto &InvAccList = S.getInvariantAccesses(); - if (InvAccList.empty()) + const auto &InvariantEquivClasses = S.getInvariantAccesses(); + if (InvariantEquivClasses.empty()) return; const Region &R = S.getRegion(); @@ -921,14 +921,20 @@ void IslNodeBuilder::preloadInvariantLoads() { isl_ast_build *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); - for (const auto &IA : InvAccList) { - MemoryAccess *MA = IA.first; + // For each equivalence class of invariant loads we pre-load the representing + // element with the unified execution context. However, we have to map all + // elements of the class to the one preloaded load as they are referenced + // during the code generation and therefor need to be mapped. + for (const auto &IAClass : InvariantEquivClasses) { + + MemoryAccess *MA = IAClass.second.front().first; assert(!MA->isImplicit()); - isl_set *Domain = isl_set_copy(IA.second); + isl_set *Domain = isl_set_copy(IAClass.second.front().second); Instruction *AccInst = MA->getAccessInstruction(); Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); - ValueMap[AccInst] = PreloadVal; + for (const InvariantAccessTy &IA : IAClass.second) + ValueMap[IA.first->getAccessInstruction()] = PreloadVal; if (SE.isSCEVable(AccInst->getType())) { isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); diff --git a/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll new file mode 100644 index 00000000000..cbebd60ce43 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-codegen -polly-parallel \ +; RUN: -polly-parallel-force -S < %s | FileCheck %s +; +; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction. +; +; void f(float *A) { +; for (int i = 1; i < 1000; i++) +; A[i] += A[0] + A[0]; +; } +; +; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds +; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* nocapture %A) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = load float, float* %A, align 4 + %tmp2 = load float, float* %A, align 4 + %tmpadd = fadd float %tmp, %tmp2 + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx1, align 4 + %add = fadd float %tmp2, %tmp1 + store float %add, float* %arrayidx1, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} diff --git a/polly/test/Isl/CodeGen/invariant_load_outermost.ll b/polly/test/Isl/CodeGen/invariant_load_outermost.ll new file mode 100644 index 00000000000..49623fd7824 --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load_outermost.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s + +; CHECK: polly.start + +; void f(int *A) { +; if (*A > 42) +; *A = *A + 1; +; else +; *A = *A - 1; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %entry.split + +entry.split: + %tmp = load i32, i32* %A, align 4 + %cmp = icmp sgt i32 %tmp, 42 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %tmp1 = load i32, i32* %A, align 4 + %add = add nsw i32 %tmp1, 1 + br label %if.end + +if.else: ; preds = %entry + %tmp2 = load i32, i32* %A, align 4 + %sub = add nsw i32 %tmp2, -1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ] + store i32 %storemerge, i32* %A, align 4 + ret void +} diff --git a/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll b/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll index 333f620be71..552fc91b57a 100644 --- a/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll +++ b/polly/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll @@ -2,9 +2,9 @@ ; RUN: -polly-codegen -S < %s | FileCheck %s ; CHECK: polly.start - +; int /* pure */ g() ; void f(int *A) { -; if (*A > 42) +; if (g()) ; *A = *A + 1; ; else ; *A = *A - 1; @@ -17,22 +17,26 @@ entry: br label %entry.split entry.split: - %tmp = load i32, i32* %A, align 4 - %cmp = icmp sgt i32 %tmp, 42 + %call = call i32 @g() + %cmp = icmp eq i32 %call, 0 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry %tmp1 = load i32, i32* %A, align 4 %add = add nsw i32 %tmp1, 1 + store i32 %add, i32* %A, align 4 br label %if.end if.else: ; preds = %entry %tmp2 = load i32, i32* %A, align 4 %sub = add nsw i32 %tmp2, -1 + store i32 %sub, i32* %A, align 4 br label %if.end if.end: ; preds = %if.else, %if.then - %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ] - store i32 %storemerge, i32* %A, align 4 ret void } + +declare i32 @g() #0 + +attributes #0 = { nounwind readnone } diff --git a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll index 11e3b4e4916..f09f8173556 100644 --- a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -17,8 +17,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK: Invariant Accesses: { ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: MemRef_init_ptr[0] -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: MemRef_init_ptr[0] +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: MemRef_init_ptr[0] ; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: diff --git a/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll b/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll index dddb185d7ec..ecf6a95f865 100644 --- a/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll +++ b/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll @@ -2,17 +2,17 @@ ; ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : } +; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : } +; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : (tmp5 >= 1 + tmp and tmp5 >= 6) or tmp >= 6 } +; CHECK-NEXT: [LB, UB] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : (UB >= 1 + LB and UB >= 6) or LB >= 6 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : tmp <= 5 } +; CHECK-NEXT: [LB, UB] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : LB <= 5 } ; CHECK-NEXT: } ; ; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB, diff --git a/polly/test/ScopInfo/invariant_loop_bounds.ll b/polly/test/ScopInfo/invariant_loop_bounds.ll index 3fc03b98751..18db15a6b0a 100644 --- a/polly/test/ScopInfo/invariant_loop_bounds.ll +++ b/polly/test/ScopInfo/invariant_loop_bounds.ll @@ -3,28 +3,28 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[2] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_0 >= 1 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_1 >= 1 and p_0 >= 1 } ; CHECK-NEXT: } ; -; CHECK: p0: %tmp -; CHECK: p1: %tmp8 -; CHECK: p2: %tmp10 +; CHECK: p0: (8 + @bounds)<nsw> +; CHECK: p1: (4 + @bounds)<nsw> +; CHECK: p2: @bounds ; CHECK: Statements { ; CHECK: Stmt_for_body_6 ; CHECK: Domain := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + p_0 and i1 >= 0 and i1 <= -1 + p_1 and i2 >= 0 and i2 <= -1 + bounds }; ; CHECK: Schedule := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: } ; ; int bounds[3]; diff --git a/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll new file mode 100644 index 00000000000..d901575ade3 --- /dev/null +++ b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll @@ -0,0 +1,106 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_bounds[0] +; CHECK-NEXT: Execution Context: [bounds] -> { : } +; CHECK-NEXT: } +; +; CHECK: p0: @bounds +; CHECK-NOT: p1 +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: Schedule := +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } +; +; int bounds[1]; +; double data[1024][1024][1024]; +; +; void foo() { +; int i, j, k; +; for (k = 0; k < bounds[0]; k++) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@bounds = common global [1 x i32] zeroinitializer, align 4 +@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 + +define void @foo() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.16, %entry + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %tmp to i64 + %cmp = icmp slt i64 %indvars.iv5, %tmp7 + br i1 %cmp, label %for.body, label %for.end.18 + +for.body: ; preds = %for.cond + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc.13, %for.body + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %tmp8 to i64 + %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 + br i1 %cmp2, label %for.body.3, label %for.end.15 + +for.body.3: ; preds = %for.cond.1 + br label %for.cond.4 + +for.cond.4: ; preds = %for.inc, %for.body.3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %tmp10 to i64 + %cmp5 = icmp slt i64 %indvars.iv, %tmp11 + br i1 %cmp5, label %for.body.6, label %for.end + +for.body.6: ; preds = %for.cond.4 + %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3 + %tmp13 = add nsw i64 %tmp12, %indvars.iv5 + %tmp14 = trunc i64 %tmp13 to i32 + %conv = sitofp i32 %tmp14 to double + %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv + %tmp15 = load double, double* %arrayidx11, align 8 + %add12 = fadd double %tmp15, %conv + store double %add12, double* %arrayidx11, align 8 + br label %for.inc + +for.inc: ; preds = %for.body.6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.4 + +for.end: ; preds = %for.cond.4 + br label %for.inc.13 + +for.inc.13: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond.1 + +for.end.15: ; preds = %for.cond.1 + br label %for.inc.16 + +for.inc.16: ; preds = %for.end.15 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + br label %for.cond + +for.end.18: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll new file mode 100644 index 00000000000..27b0e343109 --- /dev/null +++ b/polly/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll @@ -0,0 +1,109 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_bounds[0] +; CHECK-NEXT: Execution Context: [bounds, p] -> { : } +; CHECK-NEXT: } +; +; CHECK: p0: @bounds +; CHECK: p1: %p +; CHECK-NOT: p2: +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: Schedule := +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } +; +; int bounds[1]; +; double data[1024][1024][1024]; +; +; void foo(int p) { +; int i, j, k; +; for (k = 0; k < bounds[0]; k++) +; if (p == 0) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@bounds = common global [1 x i32] zeroinitializer, align 4 +@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 + +define void @foo(i32 %p) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.16, %entry + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %tmp to i64 + %cmp = icmp slt i64 %indvars.iv5, %tmp7 + br i1 %cmp, label %for.body, label %for.end.18 + +for.body: ; preds = %for.cond + %cmpp = icmp eq i32 %p, 0 + br i1 %cmpp, label %for.cond.1, label %for.inc.16 + +for.cond.1: ; preds = %for.inc.13, %for.body + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %tmp8 to i64 + %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 + br i1 %cmp2, label %for.body.3, label %for.end.15 + +for.body.3: ; preds = %for.cond.1 + br label %for.cond.4 + +for.cond.4: ; preds = %for.inc, %for.body.3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %tmp10 to i64 + %cmp5 = icmp slt i64 %indvars.iv, %tmp11 + br i1 %cmp5, label %for.body.6, label %for.end + +for.body.6: ; preds = %for.cond.4 + %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3 + %tmp13 = add nsw i64 %tmp12, %indvars.iv5 + %tmp14 = trunc i64 %tmp13 to i32 + %conv = sitofp i32 %tmp14 to double + %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv + %tmp15 = load double, double* %arrayidx11, align 8 + %add12 = fadd double %tmp15, %conv + store double %add12, double* %arrayidx11, align 8 + br label %for.inc + +for.inc: ; preds = %for.body.6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.4 + +for.end: ; preds = %for.cond.4 + br label %for.inc.13 + +for.inc.13: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond.1 + +for.end.15: ; preds = %for.cond.1 + br label %for.inc.16 + +for.inc.16: ; preds = %for.end.15 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + br label %for.cond + +for.end.18: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/required-invariant-loop-bounds.ll b/polly/test/ScopInfo/required-invariant-loop-bounds.ll index 25bf1b630ee..06d15e53884 100644 --- a/polly/test/ScopInfo/required-invariant-loop-bounds.ll +++ b/polly/test/ScopInfo/required-invariant-loop-bounds.ll @@ -3,10 +3,10 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : } +; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : tmp >= 0 } +; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : bounds >= 0 } ; CHECK: } ; double A[1000][1000]; |