From 225f0d1ee2d333919e8df066460814115a8d82ec Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 17 Oct 2015 21:36:00 +0000 Subject: Load/Store scalar accesses before/after the statement itself Instead of generating implicit loads within basic blocks, put them before the instructions of the statment itself, including non-affine subregions. The region's entry node is dominating all blocks in the region and therefore the loaded value will be available there. Implicit writes in block-stmts were already stored back at the end of the block. Now, also generate the stores of non-affine subregions when leaving the statement, i.e. in the exiting block. This change is required for array-mapped implicits ("De-LICM") to ensure that there are no dependencies of demoted scalars within statments. Statement load all required values, operator on copied in registers, and then write back the changed value to the demoted memory. Lifetimes analysis within statements becomes unecessary. Differential Revision: http://reviews.llvm.org/D13487 llvm-svn: 250625 --- polly/include/polly/CodeGen/BlockGenerators.h | 23 ++---- polly/include/polly/ScopInfo.h | 6 +- polly/lib/Analysis/ScopInfo.cpp | 6 +- polly/lib/CodeGen/BlockGenerators.cpp | 92 +++++++++++----------- .../Isl/CodeGen/non-affine-phi-node-expansion-2.ll | 2 +- .../Isl/CodeGen/non-affine-phi-node-expansion-3.ll | 2 +- .../Isl/CodeGen/non-affine-phi-node-expansion-4.ll | 2 +- polly/test/Isl/CodeGen/phi_loop_carried_float.ll | 2 +- .../Isl/CodeGen/phi_loop_carried_float_escape.ll | 2 +- polly/test/Isl/CodeGen/read-only-scalars.ll | 2 +- 10 files changed, 63 insertions(+), 76 deletions(-) (limited to 'polly') diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h index 8e1330bd205..cbda50c9f28 100644 --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -341,13 +341,11 @@ protected: Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, const char *NameExt); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. + /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap); /// @brief Generate the scalar stores for the given statement. /// @@ -356,14 +354,13 @@ protected: /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction /// variable to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, ValueMapT &BBMap); + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, + ValueMapT &BBMap); /// @brief Handle users of @p Inst outside the SCoP. /// @@ -746,14 +743,6 @@ private: void addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, BasicBlock *IncomingBB, LoopToScevMapT <S); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. - /// - /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. - /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; - /// @brief Generate the scalar stores for the given statement. /// /// After the statement @p Stmt was copied all inner-SCoP scalar dependences @@ -761,13 +750,11 @@ private: /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction variable to /// their new values (for values recalculated in the new ScoP, /// but not within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp) override; /// @brief Copy a single PHI instruction. diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 7dbb967db72..47e1288759f 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -451,9 +451,6 @@ private: bool isAffine() const { return IsAffine; } - /// @brief Is this MemoryAccess modeling special PHI node accesses? - bool isPHI() const { return Origin == PHI; } - __isl_give isl_basic_map *createBasicAccessMap(ScopStmt *Statement); void assumeNoOutOfBound(); @@ -631,6 +628,9 @@ public: /// nodes. bool isImplicit() const { return !isExplicit(); } + /// @brief Is this MemoryAccess modeling special PHI node accesses? + bool isPHI() const { return Origin == PHI; } + /// @brief Get the statement that contains this memory access. ScopStmt *getStatement() const { return Statement; } diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index be1490a9cde..55fe4f2267b 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3289,7 +3289,11 @@ bool ScopInfo::buildScalarDependences(Instruction *Inst, Region *R, continue; // Check whether or not the use is in the SCoP. - if (!R->contains(UseParent)) { + // If there is single exiting block, the single incoming value exit for node + // PHIs are handled like any escaping SCALAR. Otherwise, as if the PHI + // belongs to the the scop region. + bool IsExitNodePHI = isa(UI) && UI->getParent() == R->getExit(); + if (!R->contains(UseParent) && (R->getExitingBlock() || !IsExitNodePHI)) { AnyCrossStmtUse = true; continue; } diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 4596ce7da1b..103f9fced48 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -231,10 +231,6 @@ void BlockGenerator::generateScalarStore(ScopStmt &Stmt, StoreInst *Store, void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - - // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); - // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) @@ -295,23 +291,24 @@ BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { BasicBlock *CopyBB = splitBB(BB); + Builder.SetInsertPoint(CopyBB->begin()); + generateScalarLoads(Stmt, BBMap); + copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); + + // After a basic block was copied store all scalars that escape this block in + // their alloca. + generateScalarStores(Stmt, LTS, BBMap); return CopyBB; } void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); - - // After a basic block was copied store all scalars that escape this block - // in their alloca. First the scalars that have dependences inside the SCoP, - // then the ones that might escape the SCoP. - generateScalarStores(Stmt, BB, LTS, BBMap); } Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, @@ -382,16 +379,9 @@ void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); } -void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { - auto *MAL = Stmt.lookupAccessesFor(Inst); - - if (!MAL) - return; - - for (MemoryAccess *MA : *MAL) { - if (MA->isExplicit() || !MA->isRead()) +void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite()) continue; auto *Address = getOrCreateAlloca(*MA); @@ -447,14 +437,13 @@ Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, return ScalarValue; } -void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); - assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && - "Region statements need to use the generateScalarStores() " - "function in the RegionGenerator"); + assert(Stmt.isBlockStmt() && "Region statements need to use the " + "generateScalarStores() function in the " + "RegionGenerator"); for (MemoryAccess *MA : Stmt) { if (MA->isExplicit() || MA->isRead()) @@ -990,6 +979,9 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, RegionMaps.clear(); IncompletePHINodeMap.clear(); + // Collection of all values related to this subregion. + ValueMapT ValueMap; + // The region represented by the statement. Region *R = Stmt.getRegion(); @@ -1001,6 +993,8 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); + generateScalarLoads(Stmt, RegionMaps[EntryBBCopy]); + for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; @@ -1025,9 +1019,11 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, // Get the mapping for this block and initialize it with the mapping // available at its immediate dominator (in the new region). ValueMapT &RegionMap = RegionMaps[BBCopy]; - RegionMap = RegionMaps[BBCopyIDom]; + if (BBCopy != EntryBBCopy) + RegionMap = RegionMaps[BBCopyIDom]; // Copy the block with the BlockGenerator. + Builder.SetInsertPoint(BBCopy->begin()); copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); // In order to remap PHI nodes we store also basic block mappings. @@ -1042,6 +1038,9 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) if (R->contains(*SI) && SeenBlocks.insert(*SI).second) Blocks.push_back(*SI); + + // Remember value in case it is visible after this subregion. + ValueMap.insert(RegionMap.begin(), RegionMap.end()); } // Now create a new dedicated region exit block and add it to the region map. @@ -1107,26 +1106,14 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, LTS[L] = SE.getUnknown(LoopPHI); } - // Reset the old insert point for the build. - Builder.SetInsertPoint(ExitBBCopy->begin()); -} - -void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { - - // Inside a non-affine region PHI nodes are copied not demoted. Once the - // phi is copied it will reload all inputs from outside the region, hence - // we do not need to generate code for the read access of the operands of a - // PHI. - if (isa(Inst)) - return; + // Continue generating code in the exit block. + Builder.SetInsertPoint(ExitBBCopy->getFirstInsertionPt()); - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + // Write values visible to other statements. + generateScalarStores(Stmt, LTS, ValueMap); } -void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); @@ -1135,22 +1122,31 @@ void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, "function in the BlockGenerator"); for (MemoryAccess *MA : Stmt) { - if (MA->isExplicit() || MA->isRead()) continue; Instruction *ScalarInst = MA->getAccessInstruction(); + Value *Val = MA->getAccessValue(); - // Only generate accesses that belong to this basic block. - if (ScalarInst->getParent() != BB) - continue; + // In case we add the store into an exiting block, we need to restore the + // position for stores in the exit node. + auto SavedInsertionPoint = Builder.GetInsertPoint(); - Value *Val = MA->getAccessValue(); + // Implicit writes induced by PHIs must be written in the incoming blocks. + if (isa(ScalarInst)) { + BasicBlock *ExitingBB = ScalarInst->getParent(); + BasicBlock *ExitingBBCopy = BlockMap[ExitingBB]; + Builder.SetInsertPoint(ExitingBBCopy->getTerminator()); + } auto Address = getOrCreateAlloca(*MA); Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); Builder.CreateStore(Val, Address); + + // Restore the insertion point if necessary. + if (isa(ScalarInst)) + Builder.SetInsertPoint(SavedInsertionPoint); } } diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll index 9cc1d5e1c5d..64cb7fab8dd 100644 --- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll +++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: polly.stmt.bb3: ; preds = %polly.stmt.bb3.entry -; CHECK: %tmp6_p_scalar_ = load double, double* %arg11, !alias.scope !0, !noalias !2 +; CHECK: %tmp6_p_scalar_ = load double, double* %arg1{{[0-9]*}}, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp7 = fadd double 1.000000e+00, %tmp6_p_scalar_ ; CHECK: %p_tmp8 = fcmp olt double 1.400000e+01, %p_tmp7 ; CHECK: br i1 %p_tmp8, label %polly.stmt.bb9, label %polly.stmt.bb10 diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll index a560f518aec..8cbbc227636 100644 --- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -17,8 +17,8 @@ loop: ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: %polly.subregion.iv.inc = add i32 %polly.subregion.iv, 1 +; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: br i1 branch1: diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll index 955d4346c95..2ad52d180e4 100644 --- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll +++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll @@ -15,8 +15,8 @@ loop: ; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ] ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: %polly.subregion.iv.inc = add i32 %polly.subregion.iv, 1 +; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: br i1 ; The interesting instruction here is %val2, which does not dominate the exit of diff --git a/polly/test/Isl/CodeGen/phi_loop_carried_float.ll b/polly/test/Isl/CodeGen/phi_loop_carried_float.ll index 4e9dca0cce5..f191279ae2c 100644 --- a/polly/test/Isl/CodeGen/phi_loop_carried_float.ll +++ b/polly/test/Isl/CodeGen/phi_loop_carried_float.ll @@ -28,8 +28,8 @@ ; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops diff --git a/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll b/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll index 0129c3bd032..11d3697202a 100644 --- a/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ b/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -28,8 +28,8 @@ ; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops diff --git a/polly/test/Isl/CodeGen/read-only-scalars.ll b/polly/test/Isl/CodeGen/read-only-scalars.ll index 3b37b8b63b4..f27ec6bfdfe 100644 --- a/polly/test/Isl/CodeGen/read-only-scalars.ll +++ b/polly/test/Isl/CodeGen/read-only-scalars.ll @@ -14,8 +14,8 @@ ; SCALAR-NEXT: store float %scalar, float* %scalar.s2a ; SCALAR-LABEL: polly.stmt.stmt1: -; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %scalar.s2a.reload = load float, float* %scalar.s2a +; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %p_sum = fadd float %val_p_scalar_, %scalar.s2a.reload define void @foo(float* noalias %A, float %scalar) { -- cgit v1.2.3