diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp | 72 |
3 files changed, 93 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b1d734e932b..f038580c5f9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14172,6 +14172,24 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { + unsigned Bits = Ty->getScalarSizeInBits(); + + // 8-bit shifts are always expensive, but versions with a scalar amount aren't + // particularly cheaper than those without. + if (Bits == 8) + return false; + + // On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make + // variable shifts just as cheap as scalar ones. + if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64)) + return false; + + // Otherwise, it's significantly cheaper to shift by a scalar amount than by a + // fully general vector. + return true; +} + bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 6ea060ba3bc..ce9594ae3ed 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -679,6 +679,9 @@ namespace llvm { /// the immediate into a register. virtual bool isLegalAddImmediate(int64_t Imm) const; + + virtual bool isVectorShiftByScalarCheap(Type *Ty) const; + /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. diff --git a/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 0fde256943d..3c9ecce8e3e 100644 --- a/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -132,6 +132,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy; bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); bool OptimizeSelectInst(SelectInst *SI); + bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); }; @@ -2719,6 +2720,74 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { return true; } + +bool isBroadcastShuffle(ShuffleVectorInst *SVI) { + SmallVector<int, 16> Mask(SVI->getShuffleMask()); + int SplatElem = -1; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) + return false; + SplatElem = Mask[i]; + } + + return true; +} + +/// Some targets have expensive vector shifts if the lanes aren't all the same +/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases +/// it's often worth sinking a shufflevector splat down to its use so that +/// codegen can spot all lanes are identical. +bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { + BasicBlock *DefBB = SVI->getParent(); + + // Only do this xform if variable vector shifts are particularly expensive. + if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) + return false; + + // We only expect better codegen by sinking a shuffle if we can recognise a + // constant splat. + if (!isBroadcastShuffle(SVI)) + return false; + + // InsertedShuffles - Only insert a shuffle in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedShuffles; + + bool MadeChange = false; + for (Value::use_iterator UI = SVI->use_begin(), E = SVI->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + + // For now only apply this when the splat is used by a shift instruction. + if (!User->isShift()) continue; + + // Everything checks out, sink the shuffle if the user's block doesn't + // already have a copy. + Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; + + if (!InsertedShuffle) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), + SVI->getOperand(1), + SVI->getOperand(2), "", InsertPt); + } + + User->replaceUsesOfWith(SVI, InsertedShuffle); + MadeChange = true; + } + + // If we removed all uses, nuke the shuffle. + if (SVI->use_empty()) { + SVI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (PHINode *P = dyn_cast<PHINode>(I)) { // It is possible for very late stage optimizations (such as SimplifyCFG) @@ -2791,6 +2860,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (SelectInst *SI = dyn_cast<SelectInst>(I)) return OptimizeSelectInst(SI); + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) + return OptimizeShuffleVectorInst(SVI); + return false; } |