summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Doerfert <doerfert@cs.uni-saarland.de>2015-09-30 09:43:20 +0000
committerJohannes Doerfert <doerfert@cs.uni-saarland.de>2015-09-30 09:43:20 +0000
commitef19ead20efe75b8d9b6d3f61fe2a1ab8eabff5f (patch)
tree7884f5e2ae48e94309f829a046911c605f494075
parenta176421da504153dcf7eec3cca9cefe10464ef49 (diff)
downloadbcm5719-llvm-ef19ead20efe75b8d9b6d3f61fe2a1ab8eabff5f.tar.gz
bcm5719-llvm-ef19ead20efe75b8d9b6d3f61fe2a1ab8eabff5f.zip
[FIX] Use escape logic for invariant loads
Before we unconditinoally forced all users outside the SCoP to use the preloaded value. However, if the SCoP is not executed due to the runtime checks, we need to use the original value because it might not be invariant in the first place. llvm-svn: 248881
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp25
-rw-r--r--polly/test/Isl/CodeGen/invariant_load_escaping.ll55
-rw-r--r--polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll59
3 files changed, 132 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 83121eed0a0..0797f1e24ff 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -890,6 +890,7 @@ void IslNodeBuilder::preloadInvariantLoads() {
return;
const Region &R = S.getRegion();
+ BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
BasicBlock *PreLoadBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
@@ -915,17 +916,27 @@ void IslNodeBuilder::preloadInvariantLoads() {
isl_id_free(ParamId);
}
- SmallVector<Instruction *, 4> Users;
+ auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
+ for (auto *DerivedSAI : SAI->getDerivedSAIs())
+ DerivedSAI->setBasePtr(PreloadVal);
+
+ // Use the escape system to get the correct value to users outside
+ // the SCoP.
+ BlockGenerator::EscapeUserVectorTy EscapeUsers;
for (auto *U : AccInst->users())
if (Instruction *UI = dyn_cast<Instruction>(U))
if (!R.contains(UI))
- Users.push_back(UI);
- for (auto *U : Users)
- U->replaceUsesOfWith(AccInst, PreloadVal);
+ EscapeUsers.push_back(UI);
- auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
- for (auto *DerivedSAI : SAI->getDerivedSAIs())
- DerivedSAI->setBasePtr(PreloadVal);
+ if (EscapeUsers.empty())
+ continue;
+
+ auto *Ty = AccInst->getType();
+ auto *Alloca = new AllocaInst(Ty, AccInst->getName() + ".preload.s2a");
+ Alloca->insertBefore(EntryBB->getFirstInsertionPt());
+ Builder.CreateStore(PreloadVal, Alloca);
+
+ EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers));
}
isl_ast_build_free(Build);
diff --git a/polly/test/Isl/CodeGen/invariant_load_escaping.ll b/polly/test/Isl/CodeGen/invariant_load_escaping.ll
new file mode 100644
index 00000000000..633dda8f890
--- /dev/null
+++ b/polly/test/Isl/CodeGen/invariant_load_escaping.ll
@@ -0,0 +1,55 @@
+; RUN: opt %loadPolly -polly-codegen -polly-detect-unprofitable -S < %s | FileCheck %s
+;
+; int f(int *A, int *B) {
+; // Possible aliasing between A and B but if not then *B would be
+; // invariant. We assume this and hoist *B but need to use a merged
+; // version in the return.
+; int i = 0;
+; int x = 0;
+;
+; do {
+; x = *B;
+; A[i] += x;
+; } while (i++ < 100);
+;
+; return x;
+; }
+;
+; CHECK: polly.preload.begin:
+; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0
+; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B
+; CHECK: store i32 %polly.access.B.load, i32* %tmp.preload.s2a
+;
+; CHECK: polly.merge_new_and_old:
+; CHECK: %tmp.merge = phi i32 [ %tmp.final_reload, %polly.loop_exit ], [ %tmp, %do.cond ]
+; CHECK: br label %do.end
+;
+; CHECK: do.end:
+; CHECK: ret i32 %tmp.merge
+;
+; CHECK: polly.loop_exit:
+; CHECK: %tmp.final_reload = load i32, i32* %tmp.preload.s2a
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @f(i32* %A, i32* %B) {
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.cond, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ]
+ %tmp = load i32, i32* %B, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp1 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %tmp1, %tmp
+ store i32 %add, i32* %arrayidx, align 4
+ br label %do.cond
+
+do.cond: ; preds = %do.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 101
+ br i1 %exitcond, label %do.body, label %do.end
+
+do.end: ; preds = %do.cond
+ ret i32 %tmp
+}
diff --git a/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll b/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll
new file mode 100644
index 00000000000..0f52b94a304
--- /dev/null
+++ b/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll
@@ -0,0 +1,59 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -S < %s | FileCheck %s
+
+; CHECK: polly.merge_new_and_old:
+; CHECK-NEXT: merge = phi
+
+%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
+%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+%struct.datapartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
+%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+%struct.BiContextType = type { i16, i8, i64 }
+%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+%struct.macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.DecRefPicMarking = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking* }
+
+@img = external global %struct.ImageParameters*, align 8
+
+define void @intrapred_luma() {
+entry:
+ %PredPel = alloca [13 x i16], align 16
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %for.body, label %for.body.262
+
+for.body.262: ; preds = %for.body
+ %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+ br label %for.body.280
+
+for.body.280: ; preds = %for.body.280, %for.body.262
+ %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ]
+ %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1
+ %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66
+ %1 = load i16, i16* %arrayidx283, align 2
+ %arrayidx289 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66
+ store i16 %1, i16* %arrayidx289, align 2
+ %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1
+ br i1 false, label %for.body.280, label %for.end.298
+
+for.end.298: ; preds = %for.body.280
+ %2 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+ br label %for.body.310
+
+for.body.310: ; preds = %for.body.310, %for.end.298
+ %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ]
+ %InterScopSext = sext i16 %1 to i64
+ %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 %InterScopSext
+ %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv
+ %3 = load i16, i16* %arrayidx313, align 2
+ %arrayidx322 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1
+ store i16 %3, i16* %arrayidx322, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br i1 false, label %for.body.310, label %for.end.328
+
+for.end.328: ; preds = %for.body.310
+ ret void
+}
OpenPOWER on IntegriCloud