summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp108
1 files changed, 55 insertions, 53 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4168428c662..eefb50a5037 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -345,27 +345,23 @@ struct InstructionsState {
/// Chooses the correct key for scheduling data. If \p Op has the same (or
/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p
/// OpValue.
-static Value *isOneOf(Value *OpValue, Value *Op) {
+static Value *isOneOf(const InstructionsState &S, Value *Op) {
auto *I = dyn_cast<Instruction>(Op);
- if (!I)
- return OpValue;
- auto *OpInst = cast<Instruction>(OpValue);
- unsigned OpInstOpcode = OpInst->getOpcode();
- unsigned IOpcode = I->getOpcode();
- if (sameOpcodeOrAlt(OpInstOpcode, getAltOpcode(OpInstOpcode), IOpcode))
+ if (I && sameOpcodeOrAlt(S.Opcode, S.AltOpcode, I->getOpcode()))
return Op;
- return OpValue;
+ return S.OpValue;
}
/// \returns analysis of the Instructions in \p VL described in
/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
-static InstructionsState getSameOpcode(ArrayRef<Value *> VL) {
+static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
+ unsigned BaseIndex = 0) {
// Make sure these are all Instructions.
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
- return InstructionsState(VL[0], 0, 0);
+ return InstructionsState(VL[BaseIndex], 0, 0);
- unsigned Opcode = cast<Instruction>(VL[0])->getOpcode();
+ unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
bool HasAltOpcodes = llvm::any_of(VL, [Opcode](Value *V) {
return Opcode != cast<Instruction>(V)->getOpcode();
@@ -377,11 +373,11 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL) {
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
if (!sameOpcodeOrAlt(Opcode, AltOpcode, InstOpcode))
- return InstructionsState(VL[0], 0, 0);
+ return InstructionsState(VL[BaseIndex], 0, 0);
}
}
- return InstructionsState(VL[0], Opcode, AltOpcode);
+ return InstructionsState(VL[BaseIndex], Opcode, AltOpcode);
}
/// \returns true if all of the values in \p VL have the same type or false
@@ -632,7 +628,8 @@ private:
/// Set the Builder insert point to one after the last instruction in
/// the bundle
- void setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue);
+ void setInsertPointAfterBundle(ArrayRef<Value *> VL,
+ const InstructionsState &S);
/// \returns a vector from a collection of scalars in \p VL.
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
@@ -1077,7 +1074,8 @@ private:
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
/// actually moved at this stage.
- bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, Value *OpValue);
+ bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
+ const InstructionsState &S);
/// Un-bundles a group of instructions.
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
@@ -1087,7 +1085,7 @@ private:
/// Extends the scheduling region so that V is inside the region.
/// \returns true if the region size is within the limit.
- bool extendSchedulingRegion(Value *V, Value *OpValue);
+ bool extendSchedulingRegion(Value *V, const InstructionsState &S);
/// Initialize the ScheduleData structures for new instructions in the
/// scheduling region.
@@ -1507,7 +1505,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
BlockScheduling &BS = *BSRef.get();
- if (!BS.tryScheduleBundle(VL, this, VL0)) {
+ if (!BS.tryScheduleBundle(VL, this, S)) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL0) ||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
@@ -2851,13 +2849,14 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
}
}
-void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue) {
+void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
+ const InstructionsState &S) {
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block.
- auto *Front = cast<Instruction>(OpValue);
+ auto *Front = cast<Instruction>(S.OpValue);
auto *BB = Front->getParent();
- const unsigned Opcode = cast<Instruction>(OpValue)->getOpcode();
- const unsigned AltOpcode = getAltOpcode(Opcode);
+ const unsigned Opcode = S.Opcode;
+ const unsigned AltOpcode = S.AltOpcode;
assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
return !sameOpcodeOrAlt(Opcode, AltOpcode,
cast<Instruction>(V)->getOpcode()) ||
@@ -2873,7 +2872,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue) {
// bundle. The end of the bundle is marked by null ScheduleData.
if (BlocksSchedules.count(BB)) {
auto *Bundle =
- BlocksSchedules[BB]->getScheduleData(isOneOf(OpValue, VL.back()));
+ BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back()));
if (Bundle && Bundle->isPartOfBundle())
for (; Bundle; Bundle = Bundle->NextInBundle)
if (Bundle->OpValue == Bundle->Inst)
@@ -3029,7 +3028,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
InstructionsState S = getSameOpcode(E->Scalars);
- Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
+ Instruction *VL0 = cast<Instruction>(S.OpValue);
Type *ScalarTy = VL0->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();
@@ -3038,7 +3037,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->NeedToGather) {
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3115,7 +3114,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = V;
return V;
}
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3150,7 +3149,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = NewV;
return NewV;
}
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
@@ -3179,7 +3178,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
for (Value *V : E->Scalars)
INVL.push_back(cast<Instruction>(V)->getOperand(0));
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Value *InVec = vectorizeTree(INVL);
@@ -3206,7 +3205,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
RHSV.push_back(cast<Instruction>(V)->getOperand(1));
}
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Value *L = vectorizeTree(LHSV);
Value *R = vectorizeTree(RHSV);
@@ -3240,7 +3239,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
}
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Value *Cond = vectorizeTree(CondVec);
Value *True = vectorizeTree(TrueVec);
@@ -3289,7 +3288,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
RHSVL.push_back(I->getOperand(1));
}
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
@@ -3318,9 +3317,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
bool IsReorder = !E->ReorderIndices.empty();
- if (IsReorder)
- VL0 = cast<Instruction>(E->Scalars[E->ReorderIndices.front()]);
- setInsertPointAfterBundle(E->Scalars, VL0);
+ if (IsReorder) {
+ S = getSameOpcode(E->Scalars, E->ReorderIndices.front());
+ VL0 = cast<Instruction>(S.OpValue);
+ }
+ setInsertPointAfterBundle(E->Scalars, S);
LoadInst *LI = cast<LoadInst>(VL0);
Type *ScalarLoadTy = LI->getType();
@@ -3367,12 +3368,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
for (Value *V : E->Scalars)
ScalarStoreValues.push_back(cast<StoreInst>(V)->getValueOperand());
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Value *VecValue = vectorizeTree(ScalarStoreValues);
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
- StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
+ StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
// The pointer operand uses an in-tree scalar, so add the new BitCast to
// ExternalUses to make sure that an extract will be generated in the
@@ -3383,8 +3384,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (!Alignment)
Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
- S->setAlignment(Alignment);
- Value *V = propagateMetadata(S, E->Scalars);
+ ST->setAlignment(Alignment);
+ Value *V = propagateMetadata(ST, E->Scalars);
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
@@ -3394,7 +3395,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::GetElementPtr: {
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
ValueList Op0VL;
for (Value *V : E->Scalars)
@@ -3429,7 +3430,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Function *FI;
Intrinsic::ID IID = Intrinsic::not_intrinsic;
Value *ScalarArg = nullptr;
@@ -3486,7 +3487,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
"Invalid Shuffle Vector Operand");
reorderAltShuffleOperands(S.Opcode, S.AltOpcode, E->Scalars, LHSVL,
RHSVL);
- setInsertPointAfterBundle(E->Scalars, VL0);
+ setInsertPointAfterBundle(E->Scalars, S);
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
@@ -3789,8 +3790,9 @@ void BoUpSLP::optimizeGatherSequence() {
// Groups the instructions to a bundle (which is then a single scheduling entity)
// and schedules instructions until the bundle gets ready.
bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
- BoUpSLP *SLP, Value *OpValue) {
- if (isa<PHINode>(OpValue))
+ BoUpSLP *SLP,
+ const InstructionsState &S) {
+ if (isa<PHINode>(S.OpValue))
return true;
// Initialize the instruction bundle.
@@ -3798,12 +3800,12 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
ScheduleData *PrevInBundle = nullptr;
ScheduleData *Bundle = nullptr;
bool ReSchedule = false;
- LLVM_DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n");
+ LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n");
// Make sure that the scheduling region contains all
// instructions of the bundle.
for (Value *V : VL) {
- if (!extendSchedulingRegion(V, OpValue))
+ if (!extendSchedulingRegion(V, S))
return false;
}
@@ -3870,7 +3872,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
}
}
if (!Bundle->isReady()) {
- cancelScheduling(VL, OpValue);
+ cancelScheduling(VL, S.OpValue);
return false;
}
return true;
@@ -3913,13 +3915,13 @@ BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
}
bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
- Value *OpValue) {
- if (getScheduleData(V, isOneOf(OpValue, V)))
+ const InstructionsState &S) {
+ if (getScheduleData(V, isOneOf(S, V)))
return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
- auto &&CheckSheduleForI = [this, OpValue](Instruction *I) -> bool {
+ auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
ScheduleData *ISD = getScheduleData(I);
if (!ISD)
return false;
@@ -3927,8 +3929,8 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
"ScheduleData not in scheduling region");
ScheduleData *SD = allocateScheduleDataChunks();
SD->Inst = I;
- SD->init(SchedulingRegionID, OpValue);
- ExtraScheduleDataMap[I][OpValue] = SD;
+ SD->init(SchedulingRegionID, S.OpValue);
+ ExtraScheduleDataMap[I][S.OpValue] = SD;
return true;
};
if (CheckSheduleForI(I))
@@ -3938,7 +3940,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
initScheduleData(I, I->getNextNode(), nullptr, nullptr);
ScheduleStart = I;
ScheduleEnd = I->getNextNode();
- if (isOneOf(OpValue, I) != I)
+ if (isOneOf(S, I) != I)
CheckSheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
@@ -3961,7 +3963,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
if (&*UpIter == I) {
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
ScheduleStart = I;
- if (isOneOf(OpValue, I) != I)
+ if (isOneOf(S, I) != I)
CheckSheduleForI(I);
LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I
<< "\n");
@@ -3974,7 +3976,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
nullptr);
ScheduleEnd = I->getNextNode();
- if (isOneOf(OpValue, I) != I)
+ if (isOneOf(S, I) != I)
CheckSheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I
OpenPOWER on IntegriCloud