diff options
-rw-r--r-- | polly/include/polly/CodeGen/BlockGenerators.h | 19 | ||||
-rw-r--r-- | polly/lib/CodeGen/BlockGenerators.cpp | 63 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/MemAccess/different_types.ll | 8 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll | 8 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/exprModDiv.ll | 12 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll | 1 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll | 32 |
7 files changed, 80 insertions, 63 deletions
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h index 25187b0db3e..0b6885761a5 100644 --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -378,9 +378,13 @@ protected: /// /// @param Stmt The statement we generate code for. /// @param BB The basic block we generate code for. + /// @param LTS A mapping from loops virtual canonical induction + /// variable to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block) /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMAp); + LoopToScevMapT <S, ValueMapT &BBMap); /// @brief Handle users of @p Inst outside the SCoP. /// @@ -526,12 +530,17 @@ protected: /// /// @param ScalarValue The original value needed. /// @param R The current SCoP region. + /// @param Stmt The ScopStmt in which we look up this value. + /// @param LTS A mapping from loops virtual canonical induction + /// variable to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block) /// @param BBMap A mapping from old values to their new values /// (for values recalculated within this basic block). /// /// @returns The newest version (e.g., reloaded) of the scalar value. - Value *getNewScalarValue(Value *ScalarValue, const Region &R, - ValueMapT &BBMap); + Value *getNewScalarValue(Value *ScalarValue, const Region &R, ScopStmt &, + LoopToScevMapT <S, ValueMapT &BBMap); }; /// @brief Generate a new vector basic block for a polyhedral statement. @@ -768,8 +777,12 @@ private: /// /// @param Stmt The statement we generate code for. /// @param BB The basic block we generate code for. + /// @param LTS A mapping from loops virtual canonical induction variable to + /// their new values (for values recalculated in the new ScoP, + /// but not within this basic block) /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, + LoopToScevMapT <S, ValueMapT &BBMAp) override; /// @brief Copy a single PHI instruction. diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 652ad8b5dec..94163c3c4bc 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -285,8 +285,7 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, Loop *L = getLoopForInst(Inst); if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) { - Value *NewValue = getNewValue(Stmt, Inst, BBMap, LTS, L); - BBMap[Inst] = NewValue; + // Synthesizable statements will be generated on-demand. return; } @@ -316,28 +315,6 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, copyInstScalar(Stmt, Inst, BBMap, LTS); } -/// @brief Remove trivially dead instructions from BB -/// -/// This function drops trivially dead instructions from a basic block. It -/// on purpose does _not_ recurse into other BBs even if the deletion of -/// instructions in this basic block can make instructions in other basic blocks -/// triviall dead. -static void simplifyInstsInBlockOnly(BasicBlock *BB) { - auto BI = --BB->end(), BE = BB->begin(); - bool Exit = false; - while (!Exit) { - auto ToRemove = BI; - if (BI != BE) - BI--; - else - Exit = true; - - if (!isInstructionTriviallyDead(ToRemove)) - continue; - ToRemove->eraseFromParent(); - } -} - void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { assert(Stmt.isBlockStmt() && @@ -347,16 +324,6 @@ void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, BasicBlock *BB = Stmt.getBasicBlock(); copyBB(Stmt, BB, BBMap, LTS, NewAccesses); - - auto CopyBB = Builder.GetInsertBlock(); - // Delete trivially dead instructions in CopyBB, but not in any other BB. - // Only for copyBB we know that there will _never_ be any future uses of - // instructions that have no use after copyBB has finished. Other instructions - // in the AST that have been generated by IslNodeBuilder may look dead at - // the moment, but may possibly still be referenced by GlobalMaps. If we - // delete them now, later uses would break surprisingly. - simplifyInstsInBlockOnly(CopyBB); - Builder.SetInsertPoint(CopyBB->getTerminator()); } BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { @@ -386,7 +353,7 @@ void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, // After a basic block was copied store all scalars that escape this block // in their alloca. First the scalars that have dependences inside the SCoP, // then the ones that might escape the SCoP. - generateScalarStores(Stmt, BB, BBMap); + generateScalarStores(Stmt, BB, LTS, BBMap); const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) @@ -481,6 +448,7 @@ void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, } Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, + ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { // If the value we want to store is an instruction we might have demoted it // in order to make it accessible here. In such a case a reload is @@ -509,6 +477,16 @@ Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, if (Value *ScalarValueCopy = BBMap.lookup(ScalarValueInst)) return /* Case (3a) */ ScalarValueCopy; + if ((Stmt.isBlockStmt() && + Stmt.getBasicBlock() == ScalarValueInst->getParent()) || + (Stmt.isRegionStmt() && Stmt.getRegion()->contains(ScalarValueInst))) { + auto SynthesizedValue = trySynthesizeNewValue( + Stmt, ScalarValueInst, BBMap, LTS, getLoopForInst(ScalarValueInst)); + + if (SynthesizedValue) + return SynthesizedValue; + } + // Case (3b) Value *Address = getOrCreateScalarAlloca(ScalarValueInst); ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload"); @@ -517,6 +495,7 @@ Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, } void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, + LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); @@ -531,7 +510,7 @@ void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, Value *Val = MA->getAccessValue(); auto *Address = getOrCreateAlloca(*MA); - Val = getNewScalarValue(Val, R, BBMap); + Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); Builder.CreateStore(Val, Address); } } @@ -1153,15 +1132,6 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, LTS[L] = SE.getUnknown(LoopPHI); } - // Delete trivially dead instructions in CopyBB, but not in any other BB. - // Only for copyBB we know that there will _never_ be any future uses of - // instructions that have no use after copyBB has finished. Other instructions - // in the AST that have been generated by IslNodeBuilder may look dead at - // the moment, but may possibly still be referenced by GlobalMaps. If we - // delete them now, later uses would break surprisingly. - for (auto *BB : SeenBlocks) - simplifyInstsInBlockOnly(BlockMap[BB]); - // Reset the old insert point for the build. Builder.SetInsertPoint(ExitBBCopy->begin()); } @@ -1181,6 +1151,7 @@ void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, } void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, + LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); @@ -1203,7 +1174,7 @@ void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, auto Address = getOrCreateAlloca(*MA); - Val = getNewScalarValue(Val, R, BBMap); + Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); Builder.CreateStore(Val, Address); } } diff --git a/polly/test/Isl/CodeGen/MemAccess/different_types.ll b/polly/test/Isl/CodeGen/MemAccess/different_types.ll index ffd5bb48610..a45660784e6 100644 --- a/polly/test/Isl/CodeGen/MemAccess/different_types.ll +++ b/polly/test/Isl/CodeGen/MemAccess/different_types.ll @@ -9,10 +9,10 @@ ; A[i] += 10; ; } -; CHECK: %polly.access.cast.A1 = bitcast float* %A to i32* -; CHECK: %5 = sub nsw i64 99, %polly.indvar15 -; CHECK: %polly.access.A19 = getelementptr i32, i32* %polly.access.cast.A18, i64 %5 -; CHECK: %6 = bitcast i32* %polly.access.A19 to float* +; CHECK: %polly.access.cast.A14 = bitcast float* %A to i32* +; CHECK: %5 = sub nsw i64 99, %polly.indvar11 +; CHECK: %polly.access.A15 = getelementptr i32, i32* %polly.access.cast.A14, i64 %5 +; CHECK: %6 = bitcast i32* %polly.access.A15 to float* ; CHECK: %tmp14_p_scalar_ = load float, float* %6, align 4, !alias.scope !3, !noalias !4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll index f0b296daf75..622db9b99ac 100644 --- a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll +++ b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll @@ -25,11 +25,11 @@ ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A ; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3 -; IR: %polly.access.mul.polly.subfunc.arg.A9 = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %7 = add nsw i64 %polly.indvar5, 43 -; IR: %polly.access.add.polly.subfunc.arg.A10 = add i64 %polly.access.mul.polly.subfunc.arg.A9, %7 -; IR: %polly.access.polly.subfunc.arg.A11 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A10 -; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A11, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_ +; IR: %polly.access.add.polly.subfunc.arg.A9 = add i64 %polly.access.mul.polly.subfunc.arg.A8, %7 +; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9 +; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @new_multidim_access(i64 %n, i64 %m, float* %A) { diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll index ce59ad8fdf5..5991fac2019 100644 --- a/polly/test/Isl/CodeGen/exprModDiv.ll +++ b/polly/test/Isl/CodeGen/exprModDiv.ll @@ -28,7 +28,7 @@ ; each value of i to indeed be mapped to a value. ; ; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 -; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q +; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] @@ -42,11 +42,11 @@ ; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4 ; CHECK: %20 = add nsw i64 %p, %19 ; CHECK: %21 = add nsw i64 %20, 127 -; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21 +; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %21 ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 @@ -54,7 +54,7 @@ ; A[floor(i / 128)] ; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 -; POW2: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q +; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] @@ -64,11 +64,11 @@ ; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr ; POW2: %20 = add nsw i64 %p, %19 ; POW2: %21 = add nsw i64 %20, 128 -; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21 +; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %21 ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll index 61913cb09ef..59852f00932 100644 --- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -13,6 +13,7 @@ loop: br i1 %cond0, label %branch1, label %backedge ; CHECK-LABEL: polly.stmt.loop: +; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ] ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 diff --git a/polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll b/polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll new file mode 100644 index 00000000000..1653562d1e1 --- /dev/null +++ b/polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll @@ -0,0 +1,32 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit \ +; RUN: -polly-codegen -S < %s | FileCheck %s + +; This test ensures that the expression N + 1 that is stored in the phi-node +; alloca, is directly computed and not incorrectly transfered through memory. + +; CHECK: store i64 %2, i64* %res.phiops +; CHECK: %2 = add i64 %N, 1 + +define i64 @foo(float* %A, i64 %N) { +entry: + br label %next + +next: + %cond = icmp eq i64 %N, 0 + br i1 %cond, label %loop, label %merge + +loop: + %indvar = phi i64 [0, %next], [%indvar.next, %loop] + %indvar.next = add i64 %indvar, 1 + %sum = add i64 %N, 1 + store float 4.0, float* %A + %cmp = icmp sle i64 %indvar.next, 100 + br i1 %cmp, label %loop, label %merge + +merge: + %res = phi i64 [%sum, %loop], [0, %next] + br label %exit + +exit: + ret i64 %res +} |