summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/CodeGen/IslNodeBuilder.h2
-rw-r--r--polly/include/polly/ScopInfo.h4
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp2
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp22
-rw-r--r--polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll73
5 files changed, 92 insertions, 11 deletions
diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h
index cefc2927ce1..55356039d9c 100644
--- a/polly/include/polly/CodeGen/IslNodeBuilder.h
+++ b/polly/include/polly/CodeGen/IslNodeBuilder.h
@@ -247,7 +247,7 @@ protected:
/// to the required type.
///
/// @returns False, iff a problem occured and the load was not preloaded.
- bool preloadInvariantEquivClass(const InvariantEquivClassTy &IAClass);
+ bool preloadInvariantEquivClass(InvariantEquivClassTy &IAClass);
void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
void createForSequential(__isl_take isl_ast_node *For, bool KnownParallel);
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 0f440ef3df6..58421b247fd 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1895,10 +1895,10 @@ public:
inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; }
/// @brief Return the invariant equivalence class for @p Val if any.
- const InvariantEquivClassTy *lookupInvariantEquivClass(Value *Val) const;
+ InvariantEquivClassTy *lookupInvariantEquivClass(Value *Val);
/// @brief Return the set of invariant accesses.
- const InvariantEquivClassesTy &getInvariantAccesses() const {
+ InvariantEquivClassesTy &getInvariantAccesses() {
return InvariantEquivClasses;
}
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 014919634ed..20084cd0623 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3221,7 +3221,7 @@ void Scop::simplifySCoP(bool RemoveIgnoredStmts, DominatorTree &DT,
}
}
-const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const {
+InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
LoadInst *LInst = dyn_cast<LoadInst>(Val);
if (!LInst)
return nullptr;
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 9e078114e6e..b4787893443 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -888,7 +888,7 @@ bool IslNodeBuilder::materializeValue(isl_id *Id) {
}
}
- if (const auto *IAClass = S.lookupInvariantEquivClass(Val)) {
+ if (auto *IAClass = S.lookupInvariantEquivClass(Val)) {
// Check if this invariant access class is empty, hence if we never
// actually added a loads instruction to it. In that case it has no
@@ -1035,7 +1035,7 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
}
bool IslNodeBuilder::preloadInvariantEquivClass(
- const InvariantEquivClassTy &IAClass) {
+ InvariantEquivClassTy &IAClass) {
// For an equivalence class of invariant loads we pre-load the representing
// element with the unified execution context. However, we have to map all
// elements of the class to the one preloaded load as they are referenced
@@ -1059,18 +1059,26 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
if (!PreloadedPtrs.insert(PtrId).second)
return false;
+ // The exectution context of the IAClass.
+ isl_set *&ExecutionCtx = std::get<2>(IAClass);
+
// If the base pointer of this class is dependent on another one we have to
// make sure it was preloaded already.
auto *SAI = MA->getScopArrayInfo();
- if (const auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr()))
+ if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) {
if (!preloadInvariantEquivClass(*BaseIAClass))
return false;
+ // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
+ // we need to refine the ExecutionCtx.
+ isl_set *BaseExecutionCtx = isl_set_copy(std::get<2>(*BaseIAClass));
+ ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx);
+ }
+
Instruction *AccInst = MA->getAccessInstruction();
Type *AccInstTy = AccInst->getType();
- isl_set *Domain = isl_set_copy(std::get<2>(IAClass));
- Value *PreloadVal = preloadInvariantLoad(*MA, Domain);
+ Value *PreloadVal = preloadInvariantLoad(*MA, isl_set_copy(ExecutionCtx));
if (!PreloadVal)
return false;
@@ -1138,7 +1146,7 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
bool IslNodeBuilder::preloadInvariantLoads() {
- const auto &InvariantEquivClasses = S.getInvariantAccesses();
+ auto &InvariantEquivClasses = S.getInvariantAccesses();
if (InvariantEquivClasses.empty())
return true;
@@ -1147,7 +1155,7 @@ bool IslNodeBuilder::preloadInvariantLoads() {
PreLoadBB->setName("polly.preload.begin");
Builder.SetInsertPoint(&PreLoadBB->front());
- for (const auto &IAClass : InvariantEquivClasses)
+ for (auto &IAClass : InvariantEquivClasses)
if (!preloadInvariantEquivClass(IAClass))
return false;
diff --git a/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll b/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll
new file mode 100644
index 00000000000..cc93187d9f5
--- /dev/null
+++ b/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll
@@ -0,0 +1,73 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s
+; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s --check-prefix=IR
+;
+; As (p + q) can overflow we have to check that we load from
+; I[p + q] only if it does not.
+;
+; CHECK: Invariant Accesses: {
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_I[p + q] };
+; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 and -2147483648 - p <= q <= 2147483647 - p }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_tmp1[0] };
+; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 }
+; CHECK-NEXT: }
+;
+; IR: polly.preload.merge:
+; IR-NEXT: %polly.preload.tmp1.merge = phi i32* [ %polly.access.I.load, %polly.preload.exec ], [ null, %polly.preload.cond ]
+; IR-NEXT: store i32* %polly.preload.tmp1.merge, i32** %tmp1.preload.s2a
+; IR-NEXT: %12 = sext i32 %N to i64
+; IR-NEXT: %13 = icmp sge i64 %12, 1
+; IR-NEXT: %14 = sext i32 %q to i64
+; IR-NEXT: %15 = sext i32 %p to i64
+; IR-NEXT: %16 = add nsw i64 %15, %14
+; IR-NEXT: %17 = icmp sle i64 %16, 2147483647
+; IR-NEXT: %18 = and i1 %13, %17
+; IR-NEXT: %19 = sext i32 %q to i64
+; IR-NEXT: %20 = sext i32 %p to i64
+; IR-NEXT: %21 = add nsw i64 %20, %19
+; IR-NEXT: %22 = icmp sge i64 %21, -2147483648
+; IR-NEXT: %23 = and i1 %18, %22
+; IR-NEXT: br label %polly.preload.cond1
+;
+; IR: polly.preload.cond1:
+; IR-NEXT: br i1 %23
+;
+; IR: polly.preload.exec3:
+; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0
+; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4
+;
+; void f(int **I, int *A, int N, int p, int q) {
+; for (int i = 0; i < N; i++)
+; A[i] = *(I[p + q]);
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32** %I, i32* %A, i32 %N, i32 %p, i32 %q) {
+entry:
+ %tmp = sext i32 %N to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp slt i64 %indvars.iv, %tmp
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %add = add i32 %p, %q
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i32*, i32** %I, i64 %idxprom
+ %tmp1 = load i32*, i32** %arrayidx, align 8
+ %tmp2 = load i32, i32* %tmp1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 %tmp2, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
OpenPOWER on IntegriCloud