diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 141 |
1 files changed, 66 insertions, 75 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 71b0abbf739..12c114f83d4 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1121,14 +1121,6 @@ public: #endif }; - /// Checks if the instruction is marked for deletion. - bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } - - /// Marks values for later deletion. - void eraseInstructions(ArrayRef<Value *> AV); - - ~BoUpSLP(); - private: /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1499,12 +1491,14 @@ private: /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. void eraseInstruction(Instruction *I) { - DeletedInstructions.insert(I); + I->removeFromParent(); + I->dropAllReferences(); + DeletedInstructions.emplace_back(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallPtrSet<Instruction *, 8> DeletedInstructions; + SmallVector<unique_value, 8> DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -2061,22 +2055,6 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits { } // end namespace llvm -BoUpSLP::~BoUpSLP() { - for (auto *I : DeletedInstructions) - I->dropAllReferences(); - for (auto *I : DeletedInstructions) { - assert(I->use_empty() && "trying to erase instruction with users."); - I->eraseFromParent(); - } -} - -void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) { - for (auto *V : AV) { - if (auto *I = dyn_cast<Instruction>(V)) - eraseInstruction(I); - }; -} - void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ArrayRef<Value *> UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -3563,7 +3541,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) { // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) { + if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -4312,18 +4290,20 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; -#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { +#ifndef NDEBUG for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to delete users in the ignorelist. + // It is legal to replace users in the ignorelist by undef. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Deleting out-of-tree value"); + "Replacing out-of-tree value with undef"); } - } #endif + Value *Undef = UndefValue::get(Ty); + Scalar->replaceAllUsesWith(Undef); + } LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast<Instruction>(Scalar)); } @@ -4339,7 +4319,7 @@ void BoUpSLP::optimizeGatherSequence() { << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { - if (isDeleted(I)) + if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I)) continue; // Check if this block is inside a loop. @@ -4393,8 +4373,6 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; - if (isDeleted(In)) - continue; if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) continue; @@ -5277,6 +5255,19 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, return Changed; } +/// Check that the Values in the slice in VL array are still existent in +/// the WeakTrackingVH array. +/// Vectorization of part of the VL array may cause later values in the VL array +/// to become invalid. We track when this has happened in the WeakTrackingVH +/// array. +static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, + ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin, + unsigned SliceSize) { + VL = VL.slice(SliceBegin, SliceSize); + VH = VH.slice(SliceBegin, SliceSize); + return !std::equal(VL.begin(), VL.end(), VH.begin()); +} + bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); @@ -5288,20 +5279,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, if (!isPowerOf2_32(Sz) || VF < 2) return false; + // Keep track of values that were deleted by vectorizing in the loop below. + const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end()); + bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { - ArrayRef<Value *> Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (llvm::any_of(Operands, [&R](Value *V) { - auto *I = dyn_cast<Instruction>(V); - return I && R.isDeleted(I); - })) + if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); + ArrayRef<Value *> Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -5493,6 +5484,9 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool CandidateFound = false; int MinCost = SLPCostThreshold; + // Keep track of values that were deleted by vectorizing in the loop below. + SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end()); + unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { // No actual vectorization should happen, if number of parts is the same as @@ -5512,16 +5506,13 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; - ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (llvm::any_of(Ops, [&R](Value *V) { - auto *I = dyn_cast<Instruction>(V); - return I && R.isDeleted(I); - })) + if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); + ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); Optional<ArrayRef<unsigned>> Order = R.bestOrder(); @@ -5742,23 +5733,23 @@ class HorizontalReduction { case RK_Min: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) : Builder.CreateFCmpOLT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + break; case RK_Max: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) : Builder.CreateFCmpOGT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + break; case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpULT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + break; case RK_UMax: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpUGT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); - case RK_None: break; + case RK_None: + llvm_unreachable("Unknown reduction operation."); } - llvm_unreachable("Unknown reduction operation."); + return Builder.CreateSelect(Cmp, LHS, RHS, Name); } public: @@ -6438,9 +6429,6 @@ public: } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); - // Mark all scalar reduction ops for deletion, they are replaced by the - // vector reductions. - V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -6695,13 +6683,18 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0}); + SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0}); SmallPtrSet<Value *, 8> VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Instruction *Inst; + Value *V; unsigned Level; - std::tie(Inst, Level) = Stack.pop_back_val(); + std::tie(V, Level) = Stack.pop_back_val(); + if (!V) + continue; + auto *Inst = dyn_cast<Instruction>(V); + if (!Inst) + continue; auto *BI = dyn_cast<BinaryOperator>(Inst); auto *SI = dyn_cast<SelectInst>(Inst); if (BI || SI) { @@ -6742,8 +6735,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast<Instruction>(Op)) - if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() == BB) - Stack.emplace_back(I, Level); + if (!isa<PHINode>(I) && I->getParent() == BB) + Stack.emplace_back(Op, Level); } return Res; } @@ -6812,10 +6805,11 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto *I : reverse(Instructions)) { - if (R.isDeleted(I)) + for (auto &VH : reverse(Instructions)) { + auto *I = dyn_cast_or_null<Instruction>(VH); + if (!I) continue; if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -6844,7 +6838,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; - if (!VisitedInstrs.count(P) && !R.isDeleted(P)) + if (!VisitedInstrs.count(P)) Incoming.push_back(P); } @@ -6888,12 +6882,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector<Instruction *, 8> PostProcessInstructions; + SmallVector<WeakVH, 8> PostProcessInstructions; SmallDenseSet<Instruction *, 4> KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // Skip instructions marked for the deletion. - if (R.isDeleted(&*it)) - continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -6986,10 +6977,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, they are marked as deleted, so remove - // them from the set of candidates. - Candidates.remove_if( - [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); }); + // initially collected them. If so, the WeakTrackingVHs will have + // nullified the + // values, so remove them from the set of candidates. + Candidates.remove(nullptr); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -6997,18 +6988,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = GEPList[I]; + auto *GEPI = cast<GetElementPtrInst>(GEPList[I]); if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = GEPList[J]; + auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]); auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPI); - Candidates.remove(GEPJ); + Candidates.remove(GEPList[I]); + Candidates.remove(GEPList[J]); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPJ); + Candidates.remove(GEPList[J]); } } } |