diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 77 |
1 files changed, 62 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index b56e731c991..719df55347a 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -97,8 +97,16 @@ static const unsigned StackAdjustedAlignment = 4; namespace { +/// ChainID is an arbitrary token that is allowed to be different only for the +/// accesses that are guaranteed to be considered non-consecutive by +/// Vectorizer::isConsecutiveAccess. It's used for grouping instructions +/// together and reducing the number of instructions the main search operates on +/// at a time, i.e. this is to reduce compile time and nothing else as the main +/// search has O(n^2) time complexity. The underlying type of ChainID should not +/// be relied upon. +using ChainID = const Value *; using InstrList = SmallVector<Instruction *, 8>; -using InstrListMap = MapVector<Value *, InstrList>; +using InstrListMap = MapVector<ChainID, InstrList>; class Vectorizer { Function &F; @@ -136,9 +144,15 @@ private: return DL.getABITypeAlignment(SI->getValueOperand()->getType()); } + static const unsigned MaxDepth = 3; + bool isConsecutiveAccess(Value *A, Value *B); - bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size); - bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta); + bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, + unsigned Depth = 0) const; + bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta, + unsigned Depth) const; + bool lookThroughSelects(Value *PtrA, Value *PtrB, APInt PtrDelta, + unsigned Depth) const; /// After vectorization, reorder the instructions that I depends on /// (the instructions defining its operands), to ensure they dominate I. @@ -304,7 +318,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { return areConsecutivePointers(PtrA, PtrB, Size); } -bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size) { +bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, + APInt PtrDelta, unsigned Depth) const { unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); APInt OffsetA(PtrBitWidth, 0); APInt OffsetB(PtrBitWidth, 0); @@ -316,11 +331,11 @@ bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size) { // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) - return OffsetDelta == Size; + return OffsetDelta == PtrDelta; // Compute the necessary base pointer delta to have the necessary final delta - // equal to the size. - APInt BaseDelta = Size - OffsetDelta; + // equal to the pointer delta requested. + APInt BaseDelta = PtrDelta - OffsetDelta; // Compute the distance with SCEV between the base pointers. const SCEV *PtrSCEVA = SE.getSCEV(PtrA); @@ -341,15 +356,16 @@ bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size) { // Sometimes even this doesn't work, because SCEV can't always see through // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking // things the hard way. - return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta); + return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta, Depth); } bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB, - APInt PtrDelta) { + APInt PtrDelta, + unsigned Depth) const { auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA); auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB); if (!GEPA || !GEPB) - return false; + return lookThroughSelects(PtrA, PtrB, PtrDelta, Depth); // Look through GEPs after checking they're the same except for the last // index. @@ -434,6 +450,23 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB, return X == OffsetSCEVB; } +bool Vectorizer::lookThroughSelects(Value *PtrA, Value *PtrB, APInt PtrDelta, + unsigned Depth) const { + if (Depth++ == MaxDepth) + return false; + + if (auto *SelectA = dyn_cast<SelectInst>(PtrA)) { + if (auto *SelectB = dyn_cast<SelectInst>(PtrB)) { + return SelectA->getCondition() == SelectB->getCondition() && + areConsecutivePointers(SelectA->getTrueValue(), + SelectB->getTrueValue(), PtrDelta, Depth) && + areConsecutivePointers(SelectA->getFalseValue(), + SelectB->getFalseValue(), PtrDelta, Depth); + } + } + return false; +} + void Vectorizer::reorder(Instruction *I) { OrderedBasicBlock OBB(I->getParent()); SmallPtrSet<Instruction *, 16> InstructionsToMove; @@ -656,6 +689,20 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { return Chain.slice(0, ChainIdx); } +static ChainID getChainID(const Value *Ptr, const DataLayout &DL) { + const Value *ObjPtr = GetUnderlyingObject(Ptr, DL); + if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) { + // The select's themselves are distinct instructions even if they share the + // same condition and evaluate to consecutive pointers for true and false + // values of the condition. Therefore using the select's themselves for + // grouping instructions would put consecutive accesses into different lists + // and they won't be even checked for being consecutive, and won't be + // vectorized. + return Sel->getCondition(); + } + return ObjPtr; +} + std::pair<InstrListMap, InstrListMap> Vectorizer::collectInstructions(BasicBlock *BB) { InstrListMap LoadRefs; @@ -710,8 +757,8 @@ Vectorizer::collectInstructions(BasicBlock *BB) { continue; // Save the load locations. - Value *ObjPtr = GetUnderlyingObject(Ptr, DL); - LoadRefs[ObjPtr].push_back(LI); + const ChainID ID = getChainID(Ptr, DL); + LoadRefs[ID].push_back(LI); } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { if (!SI->isSimple()) continue; @@ -756,8 +803,8 @@ Vectorizer::collectInstructions(BasicBlock *BB) { continue; // Save store location. - Value *ObjPtr = GetUnderlyingObject(Ptr, DL); - StoreRefs[ObjPtr].push_back(SI); + const ChainID ID = getChainID(Ptr, DL); + StoreRefs[ID].push_back(SI); } } @@ -767,7 +814,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) { bool Vectorizer::vectorizeChains(InstrListMap &Map) { bool Changed = false; - for (const std::pair<Value *, InstrList> &Chain : Map) { + for (const std::pair<ChainID, InstrList> &Chain : Map) { unsigned Size = Chain.second.size(); if (Size < 2) continue; |