diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 165 |
1 files changed, 151 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f18202be167..53a43d9851e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -149,6 +149,48 @@ static bool isSplat(ArrayRef<Value *> VL) { return true; } +///\returns Opcode that can be clubbed with \p Op to create an alternate +/// sequence which can later be merged as a ShuffleVector instruction. +static unsigned getAltOpcode(unsigned Op) { + switch (Op) { + case Instruction::FAdd: + return Instruction::FSub; + case Instruction::FSub: + return Instruction::FAdd; + case Instruction::Add: + return Instruction::Sub; + case Instruction::Sub: + return Instruction::Add; + default: + return 0; + } +} + +///\returns bool representing if Opcode \p Op can be part +/// of an alternate sequence which can later be merged as +/// a ShuffleVector instruction. +static bool canCombineAsAltInst(unsigned Op) { + if (Op == Instruction::FAdd || Op == Instruction::FSub || + Op == Instruction::Sub || Op == Instruction::Add) + return true; + return false; +} + +/// \returns ShuffleVector instruction if intructions in \p VL have +/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. +/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) +static unsigned isAltInst(ArrayRef<Value *> VL) { + Instruction *I0 = dyn_cast<Instruction>(VL[0]); + unsigned Opcode = I0->getOpcode(); + unsigned AltOpcode = getAltOpcode(Opcode); + for (int i = 1, e = VL.size(); i < e; i++) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode)) + return 0; + } + return Instruction::ShuffleVector; +} + /// \returns The opcode if all of the Instructions in \p VL have the same /// opcode, or zero. static unsigned getSameOpcode(ArrayRef<Value *> VL) { @@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) { unsigned Opcode = I0->getOpcode(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast<Instruction>(VL[i]); - if (!I || Opcode != I->getOpcode()) + if (!I || Opcode != I->getOpcode()) { + if (canCombineAsAltInst(Opcode) && i == 1) + return isAltInst(VL); return 0; + } } return Opcode; } @@ -377,6 +422,7 @@ public: /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); + private: struct TreeEntry; @@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { bool SameTy = getSameType(VL); (void)SameTy; + bool isAltShuffle = false; assert(SameTy && "Invalid types!"); if (Depth == RecursionMaxDepth) { @@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { newTreeEntry(VL, false); return; } + unsigned Opcode = getSameOpcode(VL); + + // Check that this shuffle vector refers to the alternate + // sequence of opcodes. + if (Opcode == Instruction::ShuffleVector) { + Instruction *I0 = dyn_cast<Instruction>(VL[0]); + unsigned Op = I0->getOpcode(); + if (Op != Instruction::ShuffleVector) + isAltShuffle = true; + } // If all of the operands are identical or constant we have a simple solution. - if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || - !getSameOpcode(VL)) { + if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) { DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, false); return; @@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); - unsigned Opcode = getSameOpcode(VL); - // Check if it is safe to sink the loads or the stores. if (Opcode == Instruction::Load || Opcode == Instruction::Store) { Instruction *Last = getLastInstruction(VL); @@ -1057,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } return; } + case Instruction::ShuffleVector: { + // If this is not an alternate sequence of opcode like add-sub + // then do not vectorize this instruction. + if (!isAltShuffle) { + newTreeEntry(VL, false); + DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); + return; + } + newTreeEntry(VL, true); + DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + buildTree_rec(Operands, Depth + 1); + } + return; + } default: newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); @@ -1080,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } return getGatherCost(E->Scalars); } - - assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) && - "Invalid VL"); + unsigned Opcode = getSameOpcode(VL); + assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL"); Instruction *VL0 = cast<Instruction>(VL[0]); - unsigned Opcode = VL0->getOpcode(); switch (Opcode) { case Instruction::PHI: { return 0; @@ -1242,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return VecCallCost - ScalarCallCost; } + case Instruction::ShuffleVector: { + TargetTransformInfo::OperandValueKind Op1VK = + TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Op2VK = + TargetTransformInfo::OK_AnyValue; + int ScalarCost = 0; + int VecCost = 0; + for (unsigned i = 0; i < VL.size(); ++i) { + Instruction *I = cast<Instruction>(VL[i]); + if (!I) + break; + ScalarCost += + TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); + } + // VecCost is equal to sum of the cost of creating 2 vectors + // and the cost of creating shuffle. + Instruction *I0 = cast<Instruction>(VL[0]); + VecCost = + TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); + Instruction *I1 = cast<Instruction>(VL[1]); + VecCost += + TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); + VecCost += + TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); + return VecCost - ScalarCost; + } default: llvm_unreachable("Unknown instruction"); } @@ -1522,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E->Scalars); return Gather(E->Scalars, VecTy); } - - unsigned Opcode = VL0->getOpcode(); - assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode"); + unsigned Opcode = getSameOpcode(E->Scalars); switch (Opcode) { case Instruction::PHI: { @@ -1797,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = V; return V; } + case Instruction::ShuffleVector: { + ValueList LHSVL, RHSVL; + for (int i = 0, e = E->Scalars.size(); i < e; ++i) { + LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); + RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); + } + setInsertPointAfterBundle(E->Scalars); + + Value *LHS = vectorizeTree(LHSVL); + Value *RHS = vectorizeTree(RHSVL); + + if (Value *V = alreadyVectorized(E->Scalars)) + return V; + + // Create a vector of LHS op1 RHS + BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0); + Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS); + + // Create a vector of LHS op2 RHS + Instruction *VL1 = cast<Instruction>(E->Scalars[1]); + BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1); + Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); + + // Create appropriate shuffle to take alternative operations from + // the vector. + std::vector<Constant *> Mask(E->Scalars.size()); + unsigned e = E->Scalars.size(); + for (unsigned i = 0; i < e; ++i) { + if (i & 1) + Mask[i] = Builder.getInt32(e + i); + else + Mask[i] = Builder.getInt32(i); + } + + Value *ShuffleMask = ConstantVector::get(Mask); + + Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask); + E->VectorizedValue = V; + if (Instruction *I = dyn_cast<Instruction>(V)) + return propagateMetadata(I, E->Scalars); + + return V; + } default: llvm_unreachable("unknown inst"); } @@ -1865,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() { // For each lane: for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; - // No need to handle users of gathered values. if (Entry->NeedToGather) continue; @@ -2049,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass { for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), e = po_end(&F.getEntryBlock()); it != e; ++it) { BasicBlock *BB = *it; - // Vectorize trees that end at stores. if (unsigned count = collectStores(BB, R)) { (void)count; |