summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp25
-rw-r--r--polly/test/Isl/CodeGen/invariant_load_escaping.ll55
-rw-r--r--polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll59
3 files changed, 132 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 83121eed0a0..0797f1e24ff 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -890,6 +890,7 @@ void IslNodeBuilder::preloadInvariantLoads() {
return;
const Region &R = S.getRegion();
+ BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
BasicBlock *PreLoadBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
@@ -915,17 +916,27 @@ void IslNodeBuilder::preloadInvariantLoads() {
isl_id_free(ParamId);
}
- SmallVector<Instruction *, 4> Users;
+ auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
+ for (auto *DerivedSAI : SAI->getDerivedSAIs())
+ DerivedSAI->setBasePtr(PreloadVal);
+
+ // Use the escape system to get the correct value to users outside
+ // the SCoP.
+ BlockGenerator::EscapeUserVectorTy EscapeUsers;
for (auto *U : AccInst->users())
if (Instruction *UI = dyn_cast<Instruction>(U))
if (!R.contains(UI))
- Users.push_back(UI);
- for (auto *U : Users)
- U->replaceUsesOfWith(AccInst, PreloadVal);
+ EscapeUsers.push_back(UI);
- auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
- for (auto *DerivedSAI : SAI->getDerivedSAIs())
- DerivedSAI->setBasePtr(PreloadVal);
+ if (EscapeUsers.empty())
+ continue;
+
+ auto *Ty = AccInst->getType();
+ auto *Alloca = new AllocaInst(Ty, AccInst->getName() + ".preload.s2a");
+ Alloca->insertBefore(EntryBB->getFirstInsertionPt());
+ Builder.CreateStore(PreloadVal, Alloca);
+
+ EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers));
}
isl_ast_build_free(Build);
diff --git a/polly/test/Isl/CodeGen/invariant_load_escaping.ll b/polly/test/Isl/CodeGen/invariant_load_escaping.ll
new file mode 100644
index 00000000000..633dda8f890
--- /dev/null
+++ b/polly/test/Isl/CodeGen/invariant_load_escaping.ll
@@ -0,0 +1,55 @@
+; RUN: opt %loadPolly -polly-codegen -polly-detect-unprofitable -S < %s | FileCheck %s
+;
+; int f(int *A, int *B) {
+; // Possible aliasing between A and B but if not then *B would be
+; // invariant. We assume this and hoist *B but need to use a merged
+; // version in the return.
+; int i = 0;
+; int x = 0;
+;
+; do {
+; x = *B;
+; A[i] += x;
+; } while (i++ < 100);
+;
+; return x;
+; }
+;
+; CHECK: polly.preload.begin:
+; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0
+; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B
+; CHECK: store i32 %polly.access.B.load, i32* %tmp.preload.s2a
+;
+; CHECK: polly.merge_new_and_old:
+; CHECK: %tmp.merge = phi i32 [ %tmp.final_reload, %polly.loop_exit ], [ %tmp, %do.cond ]
+; CHECK: br label %do.end
+;
+; CHECK: do.end:
+; CHECK: ret i32 %tmp.merge
+;
+; CHECK: polly.loop_exit:
+; CHECK: %tmp.final_reload = load i32, i32* %tmp.preload.s2a
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @f(i32* %A, i32* %B) {
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.cond, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ]
+ %tmp = load i32, i32* %B, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp1 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %tmp1, %tmp
+ store i32 %add, i32* %arrayidx, align 4
+ br label %do.cond
+
+do.cond: ; preds = %do.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 101
+ br i1 %exitcond, label %do.body, label %do.end
+
+do.end: ; preds = %do.cond
+ ret i32 %tmp
+}
diff --git a/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll b/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll
new file mode 100644
index 00000000000..0f52b94a304
--- /dev/null
+++ b/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll
@@ -0,0 +1,59 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -S < %s | FileCheck %s
+
+; CHECK: polly.merge_new_and_old:
+; CHECK-NEXT: merge = phi
+
+%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
+%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+%struct.datapartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
+%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+%struct.BiContextType = type { i16, i8, i64 }
+%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+%struct.macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.DecRefPicMarking = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking* }
+
+@img = external global %struct.ImageParameters*, align 8
+
+define void @intrapred_luma() {
+entry:
+ %PredPel = alloca [13 x i16], align 16
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %for.body, label %for.body.262
+
+for.body.262: ; preds = %for.body
+ %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+ br label %for.body.280
+
+for.body.280: ; preds = %for.body.280, %for.body.262
+ %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ]
+ %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1
+ %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66
+ %1 = load i16, i16* %arrayidx283, align 2
+ %arrayidx289 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66
+ store i16 %1, i16* %arrayidx289, align 2
+ %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1
+ br i1 false, label %for.body.280, label %for.end.298
+
+for.end.298: ; preds = %for.body.280
+ %2 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+ br label %for.body.310
+
+for.body.310: ; preds = %for.body.310, %for.end.298
+ %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ]
+ %InterScopSext = sext i16 %1 to i64
+ %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 %InterScopSext
+ %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv
+ %3 = load i16, i16* %arrayidx313, align 2
+ %arrayidx322 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1
+ store i16 %3, i16* %arrayidx322, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br i1 false, label %for.body.310, label %for.end.328
+
+for.end.328: ; preds = %for.body.310
+ ret void
+}
OpenPOWER on IntegriCloud