diff options
-rw-r--r-- | polly/include/polly/CodeGen/IslNodeBuilder.h | 2 | ||||
-rw-r--r-- | polly/include/polly/ScopInfo.h | 4 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 2 | ||||
-rw-r--r-- | polly/lib/CodeGen/IslNodeBuilder.cpp | 22 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll | 73 |
5 files changed, 92 insertions, 11 deletions
diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h index cefc2927ce1..55356039d9c 100644 --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -247,7 +247,7 @@ protected: /// to the required type. /// /// @returns False, iff a problem occured and the load was not preloaded. - bool preloadInvariantEquivClass(const InvariantEquivClassTy &IAClass); + bool preloadInvariantEquivClass(InvariantEquivClassTy &IAClass); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For, bool KnownParallel); diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 0f440ef3df6..58421b247fd 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -1895,10 +1895,10 @@ public: inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; } /// @brief Return the invariant equivalence class for @p Val if any. - const InvariantEquivClassTy *lookupInvariantEquivClass(Value *Val) const; + InvariantEquivClassTy *lookupInvariantEquivClass(Value *Val); /// @brief Return the set of invariant accesses. - const InvariantEquivClassesTy &getInvariantAccesses() const { + InvariantEquivClassesTy &getInvariantAccesses() { return InvariantEquivClasses; } diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 014919634ed..20084cd0623 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3221,7 +3221,7 @@ void Scop::simplifySCoP(bool RemoveIgnoredStmts, DominatorTree &DT, } } -const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const { +InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) { LoadInst *LInst = dyn_cast<LoadInst>(Val); if (!LInst) return nullptr; diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 9e078114e6e..b4787893443 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -888,7 +888,7 @@ bool IslNodeBuilder::materializeValue(isl_id *Id) { } } - if (const auto *IAClass = S.lookupInvariantEquivClass(Val)) { + if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { // Check if this invariant access class is empty, hence if we never // actually added a loads instruction to it. In that case it has no @@ -1035,7 +1035,7 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, } bool IslNodeBuilder::preloadInvariantEquivClass( - const InvariantEquivClassTy &IAClass) { + InvariantEquivClassTy &IAClass) { // For an equivalence class of invariant loads we pre-load the representing // element with the unified execution context. However, we have to map all // elements of the class to the one preloaded load as they are referenced @@ -1059,18 +1059,26 @@ bool IslNodeBuilder::preloadInvariantEquivClass( if (!PreloadedPtrs.insert(PtrId).second) return false; + // The exectution context of the IAClass. + isl_set *&ExecutionCtx = std::get<2>(IAClass); + // If the base pointer of this class is dependent on another one we have to // make sure it was preloaded already. auto *SAI = MA->getScopArrayInfo(); - if (const auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) + if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { if (!preloadInvariantEquivClass(*BaseIAClass)) return false; + // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and + // we need to refine the ExecutionCtx. + isl_set *BaseExecutionCtx = isl_set_copy(std::get<2>(*BaseIAClass)); + ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx); + } + Instruction *AccInst = MA->getAccessInstruction(); Type *AccInstTy = AccInst->getType(); - isl_set *Domain = isl_set_copy(std::get<2>(IAClass)); - Value *PreloadVal = preloadInvariantLoad(*MA, Domain); + Value *PreloadVal = preloadInvariantLoad(*MA, isl_set_copy(ExecutionCtx)); if (!PreloadVal) return false; @@ -1138,7 +1146,7 @@ bool IslNodeBuilder::preloadInvariantEquivClass( bool IslNodeBuilder::preloadInvariantLoads() { - const auto &InvariantEquivClasses = S.getInvariantAccesses(); + auto &InvariantEquivClasses = S.getInvariantAccesses(); if (InvariantEquivClasses.empty()) return true; @@ -1147,7 +1155,7 @@ bool IslNodeBuilder::preloadInvariantLoads() { PreLoadBB->setName("polly.preload.begin"); Builder.SetInsertPoint(&PreLoadBB->front()); - for (const auto &IAClass : InvariantEquivClasses) + for (auto &IAClass : InvariantEquivClasses) if (!preloadInvariantEquivClass(IAClass)) return false; diff --git a/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll b/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll new file mode 100644 index 00000000000..cc93187d9f5 --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll @@ -0,0 +1,73 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s --check-prefix=IR +; +; As (p + q) can overflow we have to check that we load from +; I[p + q] only if it does not. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_I[p + q] }; +; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 and -2147483648 - p <= q <= 2147483647 - p } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_tmp1[0] }; +; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 } +; CHECK-NEXT: } +; +; IR: polly.preload.merge: +; IR-NEXT: %polly.preload.tmp1.merge = phi i32* [ %polly.access.I.load, %polly.preload.exec ], [ null, %polly.preload.cond ] +; IR-NEXT: store i32* %polly.preload.tmp1.merge, i32** %tmp1.preload.s2a +; IR-NEXT: %12 = sext i32 %N to i64 +; IR-NEXT: %13 = icmp sge i64 %12, 1 +; IR-NEXT: %14 = sext i32 %q to i64 +; IR-NEXT: %15 = sext i32 %p to i64 +; IR-NEXT: %16 = add nsw i64 %15, %14 +; IR-NEXT: %17 = icmp sle i64 %16, 2147483647 +; IR-NEXT: %18 = and i1 %13, %17 +; IR-NEXT: %19 = sext i32 %q to i64 +; IR-NEXT: %20 = sext i32 %p to i64 +; IR-NEXT: %21 = add nsw i64 %20, %19 +; IR-NEXT: %22 = icmp sge i64 %21, -2147483648 +; IR-NEXT: %23 = and i1 %18, %22 +; IR-NEXT: br label %polly.preload.cond1 +; +; IR: polly.preload.cond1: +; IR-NEXT: br i1 %23 +; +; IR: polly.preload.exec3: +; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 +; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 +; +; void f(int **I, int *A, int N, int p, int q) { +; for (int i = 0; i < N; i++) +; A[i] = *(I[p + q]); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %I, i32* %A, i32 %N, i32 %p, i32 %q) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add i32 %p, %q + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32*, i32** %I, i64 %idxprom + %tmp1 = load i32*, i32** %arrayidx, align 8 + %tmp2 = load i32, i32* %tmp1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp2, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |