summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp77
1 files changed, 62 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index b56e731c991..719df55347a 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -97,8 +97,16 @@ static const unsigned StackAdjustedAlignment = 4;
namespace {
+/// ChainID is an arbitrary token that is allowed to be different only for the
+/// accesses that are guaranteed to be considered non-consecutive by
+/// Vectorizer::isConsecutiveAccess. It's used for grouping instructions
+/// together and reducing the number of instructions the main search operates on
+/// at a time, i.e. this is to reduce compile time and nothing else as the main
+/// search has O(n^2) time complexity. The underlying type of ChainID should not
+/// be relied upon.
+using ChainID = const Value *;
using InstrList = SmallVector<Instruction *, 8>;
-using InstrListMap = MapVector<Value *, InstrList>;
+using InstrListMap = MapVector<ChainID, InstrList>;
class Vectorizer {
Function &F;
@@ -136,9 +144,15 @@ private:
return DL.getABITypeAlignment(SI->getValueOperand()->getType());
}
+ static const unsigned MaxDepth = 3;
+
bool isConsecutiveAccess(Value *A, Value *B);
- bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size);
- bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta);
+ bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta,
+ unsigned Depth = 0) const;
+ bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta,
+ unsigned Depth) const;
+ bool lookThroughSelects(Value *PtrA, Value *PtrB, APInt PtrDelta,
+ unsigned Depth) const;
/// After vectorization, reorder the instructions that I depends on
/// (the instructions defining its operands), to ensure they dominate I.
@@ -304,7 +318,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
return areConsecutivePointers(PtrA, PtrB, Size);
}
-bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size) {
+bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
+ APInt PtrDelta, unsigned Depth) const {
unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
APInt OffsetA(PtrBitWidth, 0);
APInt OffsetB(PtrBitWidth, 0);
@@ -316,11 +331,11 @@ bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size) {
// Check if they are based on the same pointer. That makes the offsets
// sufficient.
if (PtrA == PtrB)
- return OffsetDelta == Size;
+ return OffsetDelta == PtrDelta;
// Compute the necessary base pointer delta to have the necessary final delta
- // equal to the size.
- APInt BaseDelta = Size - OffsetDelta;
+ // equal to the pointer delta requested.
+ APInt BaseDelta = PtrDelta - OffsetDelta;
// Compute the distance with SCEV between the base pointers.
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
@@ -341,15 +356,16 @@ bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt Size) {
// Sometimes even this doesn't work, because SCEV can't always see through
// patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
// things the hard way.
- return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta);
+ return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta, Depth);
}
bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
- APInt PtrDelta) {
+ APInt PtrDelta,
+ unsigned Depth) const {
auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
if (!GEPA || !GEPB)
- return false;
+ return lookThroughSelects(PtrA, PtrB, PtrDelta, Depth);
// Look through GEPs after checking they're the same except for the last
// index.
@@ -434,6 +450,23 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
return X == OffsetSCEVB;
}
+bool Vectorizer::lookThroughSelects(Value *PtrA, Value *PtrB, APInt PtrDelta,
+ unsigned Depth) const {
+ if (Depth++ == MaxDepth)
+ return false;
+
+ if (auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
+ if (auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
+ return SelectA->getCondition() == SelectB->getCondition() &&
+ areConsecutivePointers(SelectA->getTrueValue(),
+ SelectB->getTrueValue(), PtrDelta, Depth) &&
+ areConsecutivePointers(SelectA->getFalseValue(),
+ SelectB->getFalseValue(), PtrDelta, Depth);
+ }
+ }
+ return false;
+}
+
void Vectorizer::reorder(Instruction *I) {
OrderedBasicBlock OBB(I->getParent());
SmallPtrSet<Instruction *, 16> InstructionsToMove;
@@ -656,6 +689,20 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
return Chain.slice(0, ChainIdx);
}
+static ChainID getChainID(const Value *Ptr, const DataLayout &DL) {
+ const Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
+ if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
+ // The select's themselves are distinct instructions even if they share the
+ // same condition and evaluate to consecutive pointers for true and false
+ // values of the condition. Therefore using the select's themselves for
+ // grouping instructions would put consecutive accesses into different lists
+ // and they won't be even checked for being consecutive, and won't be
+ // vectorized.
+ return Sel->getCondition();
+ }
+ return ObjPtr;
+}
+
std::pair<InstrListMap, InstrListMap>
Vectorizer::collectInstructions(BasicBlock *BB) {
InstrListMap LoadRefs;
@@ -710,8 +757,8 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
continue;
// Save the load locations.
- Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
- LoadRefs[ObjPtr].push_back(LI);
+ const ChainID ID = getChainID(Ptr, DL);
+ LoadRefs[ID].push_back(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isSimple())
continue;
@@ -756,8 +803,8 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
continue;
// Save store location.
- Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
- StoreRefs[ObjPtr].push_back(SI);
+ const ChainID ID = getChainID(Ptr, DL);
+ StoreRefs[ID].push_back(SI);
}
}
@@ -767,7 +814,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
bool Vectorizer::vectorizeChains(InstrListMap &Map) {
bool Changed = false;
- for (const std::pair<Value *, InstrList> &Chain : Map) {
+ for (const std::pair<ChainID, InstrList> &Chain : Map) {
unsigned Size = Chain.second.size();
if (Size < 2)
continue;
OpenPOWER on IntegriCloud