diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 152 |
1 files changed, 85 insertions, 67 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 12c114f83d4..6be03d171b6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1121,6 +1121,14 @@ public: #endif }; + /// Checks if the instruction is marked for deletion. + bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } + + /// Marks values operands for later deletion by replacing them with Undefs. + void eraseInstructions(ArrayRef<Value *> AV); + + ~BoUpSLP(); + private: /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1490,15 +1498,14 @@ private: /// This is required to ensure that there are no incorrect collisions in the /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. - void eraseInstruction(Instruction *I) { - I->removeFromParent(); - I->dropAllReferences(); - DeletedInstructions.emplace_back(I); + void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) { + auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first; + It->getSecond() = It->getSecond() && ReplaceOpsWithUndef; } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector<unique_value, 8> DeletedInstructions; + DenseMap<Instruction *, bool> DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -2055,6 +2062,30 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits { } // end namespace llvm +BoUpSLP::~BoUpSLP() { + for (const auto &Pair : DeletedInstructions) { + // Replace operands of ignored instructions with Undefs in case if they were + // marked for deletion. + if (Pair.getSecond()) { + Value *Undef = UndefValue::get(Pair.getFirst()->getType()); + Pair.getFirst()->replaceAllUsesWith(Undef); + } + Pair.getFirst()->dropAllReferences(); + } + for (const auto &Pair : DeletedInstructions) { + assert(Pair.getFirst()->use_empty() && + "trying to erase instruction with users."); + Pair.getFirst()->eraseFromParent(); + } +} + +void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) { + for (auto *V : AV) { + if (auto *I = dyn_cast<Instruction>(V)) + eraseInstruction(I, /*ReplaceWithUndef=*/true); + }; +} + void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ArrayRef<Value *> UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -3541,7 +3572,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) { // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) { + if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -4290,20 +4321,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; +#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { -#ifndef NDEBUG for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to replace users in the ignorelist by undef. + // It is legal to delete users in the ignorelist. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Replacing out-of-tree value with undef"); + "Deleting out-of-tree value"); } -#endif - Value *Undef = UndefValue::get(Ty); - Scalar->replaceAllUsesWith(Undef); } +#endif LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast<Instruction>(Scalar)); } @@ -4319,7 +4348,7 @@ void BoUpSLP::optimizeGatherSequence() { << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { - if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I)) + if (isDeleted(I)) continue; // Check if this block is inside a loop. @@ -4373,6 +4402,8 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; + if (isDeleted(In)) + continue; if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) continue; @@ -5255,19 +5286,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, return Changed; } -/// Check that the Values in the slice in VL array are still existent in -/// the WeakTrackingVH array. -/// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakTrackingVH -/// array. -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, - ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin, - unsigned SliceSize) { - VL = VL.slice(SliceBegin, SliceSize); - VH = VH.slice(SliceBegin, SliceSize); - return !std::equal(VL.begin(), VL.end(), VH.begin()); -} - bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); @@ -5279,20 +5297,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, if (!isPowerOf2_32(Sz) || VF < 2) return false; - // Keep track of values that were deleted by vectorizing in the loop below. - const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end()); - bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { + ArrayRef<Value *> Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) + if (llvm::any_of(Operands, [&R](Value *V) { + auto *I = dyn_cast<Instruction>(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); - ArrayRef<Value *> Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -5484,9 +5502,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool CandidateFound = false; int MinCost = SLPCostThreshold; - // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { // No actual vectorization should happen, if number of parts is the same as @@ -5506,13 +5521,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; + ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) + if (llvm::any_of(Ops, [&R](Value *V) { + auto *I = dyn_cast<Instruction>(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); - ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); Optional<ArrayRef<unsigned>> Order = R.bestOrder(); @@ -5733,23 +5751,23 @@ class HorizontalReduction { case RK_Min: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) : Builder.CreateFCmpOLT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_Max: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) : Builder.CreateFCmpOGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpULT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMax: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpUGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_None: - llvm_unreachable("Unknown reduction operation."); + break; } - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + llvm_unreachable("Unknown reduction operation."); } public: @@ -6429,6 +6447,9 @@ public: } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the + // vector reductions. + V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -6683,18 +6704,13 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0}); + SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0}); SmallPtrSet<Value *, 8> VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V; + Instruction *Inst; unsigned Level; - std::tie(V, Level) = Stack.pop_back_val(); - if (!V) - continue; - auto *Inst = dyn_cast<Instruction>(V); - if (!Inst) - continue; + std::tie(Inst, Level) = Stack.pop_back_val(); auto *BI = dyn_cast<BinaryOperator>(Inst); auto *SI = dyn_cast<SelectInst>(Inst); if (BI || SI) { @@ -6735,8 +6751,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast<Instruction>(Op)) - if (!isa<PHINode>(I) && I->getParent() == BB) - Stack.emplace_back(Op, Level); + if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() == BB) + Stack.emplace_back(I, Level); } return Res; } @@ -6805,11 +6821,10 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto &VH : reverse(Instructions)) { - auto *I = dyn_cast_or_null<Instruction>(VH); - if (!I) + for (auto *I : reverse(Instructions)) { + if (R.isDeleted(I)) continue; if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -6838,7 +6853,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; - if (!VisitedInstrs.count(P)) + if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -6882,9 +6897,12 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector<WeakVH, 8> PostProcessInstructions; + SmallVector<Instruction *, 8> PostProcessInstructions; SmallDenseSet<Instruction *, 4> KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip instructions marked for the deletion. + if (R.isDeleted(&*it)) + continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -6977,10 +6995,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakTrackingVHs will have - // nullified the - // values, so remove them from the set of candidates. - Candidates.remove(nullptr); + // initially collected them. If so, they are marked as deleted, so remove + // them from the set of candidates. + Candidates.remove_if( + [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -6988,18 +7006,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = cast<GetElementPtrInst>(GEPList[I]); + auto *GEPI = GEPList[I]; if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]); + auto *GEPJ = GEPList[J]; auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPList[I]); - Candidates.remove(GEPList[J]); + Candidates.remove(GEPI); + Candidates.remove(GEPJ); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPList[J]); + Candidates.remove(GEPJ); } } } |