diff options
-rw-r--r-- | polly/include/polly/CodeGen/IRBuilder.h | 13 | ||||
-rw-r--r-- | polly/include/polly/CodeGen/Utils.h | 22 | ||||
-rwxr-xr-x | polly/include/polly/Support/ScopHelper.h | 9 | ||||
-rw-r--r-- | polly/lib/CodeGen/CodeGeneration.cpp | 3 | ||||
-rw-r--r-- | polly/lib/CodeGen/IslCodeGeneration.cpp | 60 | ||||
-rw-r--r-- | polly/lib/CodeGen/Utils.cpp | 15 | ||||
-rw-r--r-- | polly/lib/Support/ScopHelper.cpp | 16 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/blas_sscal_simplified.ll | 44 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/multidim_2d_parametric_array_static_loop_bounds.ll | 7 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll | 2 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll | 35 |
11 files changed, 167 insertions, 59 deletions
diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h index a2b7e31f847..e4ab1466d88 100644 --- a/polly/include/polly/CodeGen/IRBuilder.h +++ b/polly/include/polly/CodeGen/IRBuilder.h @@ -95,11 +95,16 @@ private: // // We currently always name instructions, as the polly test suite currently // matches for certain names. -// -// typedef PollyBuilderInserter<false> IRInserter; -// typedef llvm::IRBuilder<false, llvm::ConstantFolder, IRInserter> -// PollyIRBuilder; typedef PollyBuilderInserter<true> IRInserter; typedef llvm::IRBuilder<true, llvm::ConstantFolder, IRInserter> PollyIRBuilder; + +/// @brief Return an IR builder pointed before the @p BB terminator. +static inline PollyIRBuilder createPollyIRBuilder(llvm::BasicBlock *BB, + LoopAnnotator &LA) { + PollyIRBuilder Builder(BB->getContext(), llvm::ConstantFolder(), + polly::IRInserter(LA)); + Builder.SetInsertPoint(BB->getTerminator()); + return Builder; +} } #endif diff --git a/polly/include/polly/CodeGen/Utils.h b/polly/include/polly/CodeGen/Utils.h index 341ba05aea2..0752aa368de 100644 --- a/polly/include/polly/CodeGen/Utils.h +++ b/polly/include/polly/CodeGen/Utils.h @@ -15,6 +15,7 @@ namespace llvm { class Pass; +class Value; class BasicBlock; } @@ -22,16 +23,12 @@ namespace polly { class Scop; -/// @brief Execute a Scop conditionally. +/// @brief Execute a Scop conditionally wrt @p RTC. /// /// In the CFG the optimized code of the Scop is generated next to the /// original code. Both the new and the original version of the code remain -/// in the CFG. A branch statement decides which version is executed. -/// For now, we always execute the new version (the old one is dead code -/// eliminated by the cleanup passes). In the future we may decide to execute -/// the new version only if certain run time checks succeed. This will be -/// useful to support constructs for which we cannot prove all assumptions at -/// compile time. +/// in the CFG. A branch statement decides which version is executed based on +/// the runtime value of @p RTC. /// /// Before transformation: /// @@ -53,9 +50,12 @@ class Scop; /// \ / /// bb1 (joinBlock) /// -/// @param S The Scop to execute conditionally. -/// @param PassInfo A reference to the pass calling this function. -/// @return BasicBlock The 'StartBlock' to which new code can be added. -llvm::BasicBlock *executeScopConditionally(Scop &S, llvm::Pass *PassInfo); +/// @param S The Scop to execute conditionally. +/// @param P A reference to the pass calling this function. +/// @param RTC The runtime condition checked before executing the new SCoP. +/// +/// @return The 'StartBlock' to which new code can be added. +llvm::BasicBlock *executeScopConditionally(Scop &S, llvm::Pass *P, + llvm::Value *RTC); } #endif diff --git a/polly/include/polly/Support/ScopHelper.h b/polly/include/polly/Support/ScopHelper.h index 4f275b2f74b..ca30c85aa05 100755 --- a/polly/include/polly/Support/ScopHelper.h +++ b/polly/include/polly/Support/ScopHelper.h @@ -52,13 +52,14 @@ bool hasInvokeEdge(const llvm::PHINode *PN); llvm::Value *getPointerOperand(llvm::Instruction &Inst); llvm::BasicBlock *createSingleExitEdge(llvm::Region *R, llvm::Pass *P); -/// @brief Simplify the region in a scop to have a single entry edge -/// and a single exit edge. +/// @brief Simplify the region in a SCoP to have a single unconditional entry +/// edge and a single exit edge. /// -/// @param S The scop that is simplified. +/// @param S The SCoP that is simplified. /// @param P The pass that is currently running. /// -void simplifyRegion(polly::Scop *S, llvm::Pass *P); +/// @return The unique entering block for the region. +llvm::BasicBlock *simplifyRegion(polly::Scop *S, llvm::Pass *P); /// @brief Split the entry block of a function to store the newly inserted /// allocations outside of all Scops. diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index c9f7c3f2334..4f3362c2056 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -1045,7 +1045,8 @@ public: simplifyRegion(&S, this); - BasicBlock *StartBlock = executeScopConditionally(S, this); + Value *RTC = ConstantInt::getTrue(S.getSE()->getContext()); + BasicBlock *StartBlock = executeScopConditionally(S, this, RTC); PollyIRBuilder Builder(StartBlock->begin()); diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp index f2ff3b10644..84b91ed65d5 100644 --- a/polly/lib/CodeGen/IslCodeGeneration.cpp +++ b/polly/lib/CodeGen/IslCodeGeneration.cpp @@ -566,41 +566,51 @@ public: IslCodeGeneration() : ScopPass(ID) {} + /// @name The analysis passes we need to generate code. + /// + ///{ + LoopInfo *LI; + IslAstInfo *AI; + DominatorTree *DT; + ScalarEvolution *SE; + ///} + + /// @brief The loop annotator to generate llvm.loop metadata. + LoopAnnotator Annotator; + + /// @brief Build the runtime condition. + /// + /// Build the condition that evaluates at run-time to true iff all + /// assumptions taken for the SCoP hold, and to false otherwise. + /// + /// @return A value evaluating to true/false if execution is save/unsafe. + Value *buildRTC(PollyIRBuilder &Builder, IslExprBuilder &ExprBuilder) { + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + Value *RTC = ExprBuilder.create(AI->getRunCondition()); + return Builder.CreateIsNotNull(RTC); + } + bool runOnScop(Scop &S) { - LoopInfo &LI = getAnalysis<LoopInfo>(); - IslAstInfo &AstInfo = getAnalysis<IslAstInfo>(); - ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI = &getAnalysis<LoopInfo>(); + AI = &getAnalysis<IslAstInfo>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = &getAnalysis<ScalarEvolution>(); assert(!S.getRegion().isTopLevelRegion() && "Top level regions are not supported"); - simplifyRegion(&S, this); - - BasicBlock *StartBlock = executeScopConditionally(S, this); - isl_ast_node *Ast = AstInfo.getAst(); - LoopAnnotator Annotator; - PollyIRBuilder Builder(StartBlock->getContext(), llvm::ConstantFolder(), - polly::IRInserter(Annotator)); - Builder.SetInsertPoint(StartBlock->begin()); + BasicBlock *EnteringBB = simplifyRegion(&S, this); + PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); - IslNodeBuilder NodeBuilder(Builder, Annotator, this, LI, SE, DT); - - Builder.SetInsertPoint(StartBlock->getSinglePredecessor()->begin()); + IslNodeBuilder NodeBuilder(Builder, Annotator, this, *LI, *SE, *DT); NodeBuilder.addMemoryAccesses(S); NodeBuilder.addParameters(S.getContext()); - // Build condition that evaluates at run-time if all assumptions taken - // for the scop hold. If we detect some assumptions do not hold, the - // original code is executed. - Value *V = NodeBuilder.getExprBuilder().create(AstInfo.getRunCondition()); - Value *Zero = ConstantInt::get(V->getType(), 0); - V = Builder.CreateICmp(CmpInst::ICMP_NE, Zero, V); - BasicBlock *PrevBB = StartBlock->getUniquePredecessor(); - BranchInst *Branch = dyn_cast<BranchInst>(PrevBB->getTerminator()); - Branch->setCondition(V); + + Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); + BasicBlock *StartBlock = executeScopConditionally(S, this, RTC); Builder.SetInsertPoint(StartBlock->begin()); - NodeBuilder.create(Ast); + NodeBuilder.create(AI->getAst()); return true; } diff --git a/polly/lib/CodeGen/Utils.cpp b/polly/lib/CodeGen/Utils.cpp index 36722d3652e..8ea9a4aa32c 100644 --- a/polly/lib/CodeGen/Utils.cpp +++ b/polly/lib/CodeGen/Utils.cpp @@ -20,18 +20,17 @@ using namespace llvm; -BasicBlock *polly::executeScopConditionally(Scop &S, Pass *PassInfo) { +BasicBlock *polly::executeScopConditionally(Scop &S, Pass *P, Value *RTC) { BasicBlock *StartBlock, *SplitBlock, *NewBlock; Region &R = S.getRegion(); PollyIRBuilder Builder(R.getEntry()); - DominatorTree &DT = - PassInfo->getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - RegionInfo &RI = PassInfo->getAnalysis<RegionInfoPass>().getRegionInfo(); - LoopInfo &LI = PassInfo->getAnalysis<LoopInfo>(); + DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + RegionInfo &RI = P->getAnalysis<RegionInfoPass>().getRegionInfo(); + LoopInfo &LI = P->getAnalysis<LoopInfo>(); // Split the entry edge of the region and generate a new basic block on this // edge. This function also updates ScopInfo and RegionInfo. - NewBlock = SplitEdge(R.getEnteringBlock(), R.getEntry(), PassInfo); + NewBlock = SplitEdge(R.getEnteringBlock(), R.getEntry(), P); if (DT.dominates(R.getEntry(), NewBlock)) { BasicBlock *OldBlock = R.getEntry(); std::string OldName = OldBlock->getName(); @@ -59,7 +58,7 @@ BasicBlock *polly::executeScopConditionally(Scop &S, Pass *PassInfo) { StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F); SplitBlock->getTerminator()->eraseFromParent(); Builder.SetInsertPoint(SplitBlock); - Builder.CreateCondBr(Builder.getTrue(), StartBlock, R.getEntry()); + Builder.CreateCondBr(RTC, StartBlock, R.getEntry()); if (Loop *L = LI.getLoopFor(SplitBlock)) L->addBasicBlockToLoop(StartBlock, LI.getBase()); DT.addNewBlock(StartBlock, SplitBlock); @@ -72,7 +71,7 @@ BasicBlock *polly::executeScopConditionally(Scop &S, Pass *PassInfo) { // PHI nodes that would complicate life. MergeBlock = R.getExit(); else { - MergeBlock = SplitEdge(R.getExitingBlock(), R.getExit(), PassInfo); + MergeBlock = SplitEdge(R.getExitingBlock(), R.getExit(), P); // SplitEdge will never split R.getExit(), as R.getExit() has more than // one predecessor. Hence, mergeBlock is always a newly generated block. R.replaceExitRecursive(MergeBlock); diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp index 15adf2d8381..01c1dd0048a 100644 --- a/polly/lib/Support/ScopHelper.cpp +++ b/polly/lib/Support/ScopHelper.cpp @@ -86,11 +86,14 @@ BasicBlock *polly::createSingleExitEdge(Region *R, Pass *P) { return SplitBlockPredecessors(BB, Preds, ".region", P); } -void polly::simplifyRegion(Scop *S, Pass *P) { +BasicBlock *polly::simplifyRegion(Scop *S, Pass *P) { Region *R = &S->getRegion(); + // The entering block for the region. + BasicBlock *EnteringBB = R->getEnteringBlock(); + // Create single entry edge if the region has multiple entry edges. - if (!R->getEnteringBlock()) { + if (!EnteringBB) { BasicBlock *OldEntry = R->getEntry(); BasicBlock *NewEntry = SplitBlock(OldEntry, OldEntry->begin(), P); @@ -101,6 +104,13 @@ void polly::simplifyRegion(Scop *S, Pass *P) { } R->replaceEntryRecursive(NewEntry); + EnteringBB = OldEntry; + } + + // Create an unconditional entry edge. + if (EnteringBB->getTerminator()->getNumSuccessors() != 1) { + EnteringBB = SplitEdge(EnteringBB, R->getEntry(), P); + EnteringBB->setName("polly.entering.block"); } // Create single exit edge if the region has multiple exit edges. @@ -110,6 +120,8 @@ void polly::simplifyRegion(Scop *S, Pass *P) { for (auto &&SubRegion : *R) SubRegion->replaceExitRecursive(NewExit); } + + return EnteringBB; } void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) { diff --git a/polly/test/Isl/CodeGen/blas_sscal_simplified.ll b/polly/test/Isl/CodeGen/blas_sscal_simplified.ll new file mode 100644 index 00000000000..af847b69bdd --- /dev/null +++ b/polly/test/Isl/CodeGen/blas_sscal_simplified.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-codegen-isl < %s +; +; Regression test for a bug in the runtime check generation. + +; This was extracted from the blas testcase. It crashed in one +; part of the runtime check generation at some point. To be +; precise, we couldn't find a suitable block to put the RTC code in. +; +; int sscal(int n, float sa, float *sx) { +; for(int i=0; i<n; i++, sx++) +; *sx *= sa; +; return 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @sscal(i32 %n, float %sa, float* %sx) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry.split + %0 = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %indvar = phi i64 [ 0, %for.body.lr.ph ], [ %indvar.next, %for.body ] + %sx.addr.02 = getelementptr float* %sx, i64 %indvar + %tmp = load float* %sx.addr.02, align 4 + %mul = fmul float %tmp, %sa + store float %mul, float* %sx.addr.02, align 4 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp ne i64 %indvar.next, %0 + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + ret i32 0 +} diff --git a/polly/test/Isl/CodeGen/multidim_2d_parametric_array_static_loop_bounds.ll b/polly/test/Isl/CodeGen/multidim_2d_parametric_array_static_loop_bounds.ll index 3bc8051cad6..a49151c0dc9 100644 --- a/polly/test/Isl/CodeGen/multidim_2d_parametric_array_static_loop_bounds.ll +++ b/polly/test/Isl/CodeGen/multidim_2d_parametric_array_static_loop_bounds.ll @@ -9,11 +9,12 @@ target triple = "x86_64-unknown-linux-gnu" ; for (long j = 0; j < 150; j++) ; A[i][j] = 1.0; ; } - -; CHECK: polly.split_new_and_old: +; +; CHECK: entry: ; CHECK: %0 = icmp sge i64 %m, 150 ; CHECK: %1 = select i1 %0, i64 1, i64 0 -; CHECK: %2 = icmp ne i64 0, %1 +; CHECK: %2 = icmp ne i64 %1, 0 +; CHECK: polly.split_new_and_old: ; CHECK: br i1 %2, label %polly.start, label %for.i define void @foo(i64 %n, i64 %m, double* %A) { diff --git a/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll b/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll index 017f047c463..1d3277eb8a1 100644 --- a/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll +++ b/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll @@ -4,7 +4,7 @@ ; CHECK: %1 = zext i32 %n to i64 ; CHECK: %2 = icmp sge i64 %1, 1 ; CHECK: %3 = select i1 %2, i64 1, i64 0 -; CHECK: %4 = icmp ne i64 0, %3 +; CHECK: %4 = icmp ne i64 %3, 0 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll b/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll new file mode 100644 index 00000000000..2780fb9cab2 --- /dev/null +++ b/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-codegen-isl -S -polly-delinearize < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen-isl -S -polly-delinearize -polly-codegen-scev < %s | FileCheck %s + +; Verify that we generate the runtime check code after the conditional branch +; in the SCoP region entering block (here %entry). +; +; CHECK: entry: +; CHECK: zext i32 %n to i64 +; CHECK: br i1 false +; +; CHECK: %[[T0:[._a-zA-Z0-9]]] = zext i32 %n to i64 +; CHECK: %[[T1:[._a-zA-Z0-9]]] = icmp sge i64 %[[T0]], 1 +; CHECK: %[[T2:[._a-zA-Z0-9]]] = select i1 %[[T1]], i64 1, i64 0 +; CHECK: %[[T3:[._a-zA-Z0-9]]] = icmp ne i64 %[[T2]], 0 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @init_array(i32 %n, double* %data) { +entry: + %0 = zext i32 %n to i64 + br i1 false, label %for.end10, label %for.body4 + +for.body4: ; preds = %for.body4, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.body4 ], [ 0, %entry ] + %.moved.to.for.body4 = mul i64 %0, %indvar1 + %1 = add i64 %.moved.to.for.body4, 0 + %arrayidx7 = getelementptr double* %data, i64 %1 + store double undef, double* %arrayidx7, align 8 + %indvar.next2 = add i64 %indvar1, 1 + br i1 false, label %for.body4, label %for.end10 + +for.end10: ; preds = %for.body4 + ret void +} |