diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-02-24 15:14:21 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-02-24 15:14:21 +0000 |
| commit | 3b6feeaa7c88598657a8d74fe9f7aeafecf80756 (patch) | |
| tree | 880ea1b785562e4ef34b6b2c5da4cfa9b9a44f3c /llvm/lib | |
| parent | ef10cd7f4674d4bf00a788d77173f7bedaf63630 (diff) | |
| download | bcm5719-llvm-3b6feeaa7c88598657a8d74fe9f7aeafecf80756.tar.gz bcm5719-llvm-3b6feeaa7c88598657a8d74fe9f7aeafecf80756.zip | |
[X86][SSE41] Combine vector blends with zero
Part 2 of 2
This patch add support for combining target shuffles into blends-with-zero.
Differential Revision: http://reviews.llvm.org/D17483
llvm-svn: 261745
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c7b4fa5ec6a..3ffd6666f55 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4332,6 +4332,17 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, return true; } +/// Return true if every element in Mask, beginning +/// from position Pos and ending in Pos+Size, falls within the specified +/// sequential range (Low, Low+Size], or is undef or is zero. +static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, + unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low) + if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) + return false; + return true; +} + /// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector /// extract that is suitable for instruction that extract 128 or 256 bit vectors static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { @@ -23666,6 +23677,53 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, return true; } + // Attempt to blend with zero. + if (VT.getVectorNumElements() <= 8 && + ((Subtarget.hasSSE41() && VT.is128BitVector()) || + (Subtarget.hasAVX() && VT.is256BitVector()))) { + // Convert VT to a type compatible with X86ISD::BLENDI. + // TODO - add 16i16 support (requires lane duplication). + MVT ShuffleVT = VT; + if (Subtarget.hasAVX2()) { + if (VT == MVT::v4i64) + ShuffleVT = MVT::v8i32; + else if (VT == MVT::v2i64) + ShuffleVT = MVT::v4i32; + } else { + if (VT == MVT::v2i64 || VT == MVT::v4i32) + ShuffleVT = MVT::v8i16; + else if (VT == MVT::v4i64) + ShuffleVT = MVT::v4f64; + else if (VT == MVT::v8i32) + ShuffleVT = MVT::v8f32; + } + + if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ Mask.size(), + /*Low*/ 0) && + Mask.size() <= ShuffleVT.getVectorNumElements()) { + unsigned BlendMask = 0; + unsigned ShuffleSize = ShuffleVT.getVectorNumElements(); + unsigned MaskRatio = ShuffleSize / Mask.size(); + + for (unsigned i = 0; i != ShuffleSize; ++i) + if (Mask[i / MaskRatio] < 0) + BlendMask |= 1u << i; + + if (Root.getOpcode() != X86ISD::BLENDI || + Root->getConstantOperandVal(2) != BlendMask) { + SDValue Zero = getZeroVector(ShuffleVT, Subtarget, DAG, DL); + Res = DAG.getBitcast(ShuffleVT, Input); + DCI.AddToWorklist(Res.getNode()); + Res = DAG.getNode(X86ISD::BLENDI, DL, ShuffleVT, Res, Zero, + DAG.getConstant(BlendMask, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + } + } + // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) |

