diff options
-rw-r--r-- | polly/lib/CodeGen/IslNodeBuilder.cpp | 25 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/invariant_load_escaping.ll | 55 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll | 59 |
3 files changed, 132 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 83121eed0a0..0797f1e24ff 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -890,6 +890,7 @@ void IslNodeBuilder::preloadInvariantLoads() { return; const Region &R = S.getRegion(); + BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); @@ -915,17 +916,27 @@ void IslNodeBuilder::preloadInvariantLoads() { isl_id_free(ParamId); } - SmallVector<Instruction *, 4> Users; + auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); + for (auto *DerivedSAI : SAI->getDerivedSAIs()) + DerivedSAI->setBasePtr(PreloadVal); + + // Use the escape system to get the correct value to users outside + // the SCoP. + BlockGenerator::EscapeUserVectorTy EscapeUsers; for (auto *U : AccInst->users()) if (Instruction *UI = dyn_cast<Instruction>(U)) if (!R.contains(UI)) - Users.push_back(UI); - for (auto *U : Users) - U->replaceUsesOfWith(AccInst, PreloadVal); + EscapeUsers.push_back(UI); - auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); - for (auto *DerivedSAI : SAI->getDerivedSAIs()) - DerivedSAI->setBasePtr(PreloadVal); + if (EscapeUsers.empty()) + continue; + + auto *Ty = AccInst->getType(); + auto *Alloca = new AllocaInst(Ty, AccInst->getName() + ".preload.s2a"); + Alloca->insertBefore(EntryBB->getFirstInsertionPt()); + Builder.CreateStore(PreloadVal, Alloca); + + EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers)); } isl_ast_build_free(Build); diff --git a/polly/test/Isl/CodeGen/invariant_load_escaping.ll b/polly/test/Isl/CodeGen/invariant_load_escaping.ll new file mode 100644 index 00000000000..633dda8f890 --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load_escaping.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -polly-codegen -polly-detect-unprofitable -S < %s | FileCheck %s +; +; int f(int *A, int *B) { +; // Possible aliasing between A and B but if not then *B would be +; // invariant. We assume this and hoist *B but need to use a merged +; // version in the return. +; int i = 0; +; int x = 0; +; +; do { +; x = *B; +; A[i] += x; +; } while (i++ < 100); +; +; return x; +; } +; +; CHECK: polly.preload.begin: +; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B +; CHECK: store i32 %polly.access.B.load, i32* %tmp.preload.s2a +; +; CHECK: polly.merge_new_and_old: +; CHECK: %tmp.merge = phi i32 [ %tmp.final_reload, %polly.loop_exit ], [ %tmp, %do.cond ] +; CHECK: br label %do.end +; +; CHECK: do.end: +; CHECK: ret i32 %tmp.merge +; +; CHECK: polly.loop_exit: +; CHECK: %tmp.final_reload = load i32, i32* %tmp.preload.s2a +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @f(i32* %A, i32* %B) { +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ] + %tmp = load i32, i32* %B, align 4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %tmp1, %tmp + store i32 %add, i32* %arrayidx, align 4 + br label %do.cond + +do.cond: ; preds = %do.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 101 + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + ret i32 %tmp +} diff --git a/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll b/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll new file mode 100644 index 00000000000..0f52b94a304 --- /dev/null +++ b/polly/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -S < %s | FileCheck %s + +; CHECK: polly.merge_new_and_old: +; CHECK-NEXT: merge = phi + +%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 } +%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float } +%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] } +%struct.datapartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment } +%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 } +%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 } +%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] } +%struct.BiContextType = type { i16, i8, i64 } +%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] } +%struct.macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.DecRefPicMarking = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking* } + +@img = external global %struct.ImageParameters*, align 8 + +define void @intrapred_luma() { +entry: + %PredPel = alloca [13 x i16], align 16 + br label %for.body + +for.body: ; preds = %for.body, %entry + br i1 undef, label %for.body, label %for.body.262 + +for.body.262: ; preds = %for.body + %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 + br label %for.body.280 + +for.body.280: ; preds = %for.body.280, %for.body.262 + %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ] + %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1 + %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66 + %1 = load i16, i16* %arrayidx283, align 2 + %arrayidx289 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66 + store i16 %1, i16* %arrayidx289, align 2 + %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1 + br i1 false, label %for.body.280, label %for.end.298 + +for.end.298: ; preds = %for.body.280 + %2 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 + br label %for.body.310 + +for.body.310: ; preds = %for.body.310, %for.end.298 + %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ] + %InterScopSext = sext i16 %1 to i64 + %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 %InterScopSext + %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv + %3 = load i16, i16* %arrayidx313, align 2 + %arrayidx322 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1 + store i16 %3, i16* %arrayidx322, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 false, label %for.body.310, label %for.end.328 + +for.end.328: ; preds = %for.body.310 + ret void +} |