summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-02-24 15:14:21 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-02-24 15:14:21 +0000
commit3b6feeaa7c88598657a8d74fe9f7aeafecf80756 (patch)
tree880ea1b785562e4ef34b6b2c5da4cfa9b9a44f3c /llvm/lib
parentef10cd7f4674d4bf00a788d77173f7bedaf63630 (diff)
downloadbcm5719-llvm-3b6feeaa7c88598657a8d74fe9f7aeafecf80756.tar.gz
bcm5719-llvm-3b6feeaa7c88598657a8d74fe9f7aeafecf80756.zip
[X86][SSE41] Combine vector blends with zero
Part 2 of 2 This patch add support for combining target shuffles into blends-with-zero. Differential Revision: http://reviews.llvm.org/D17483 llvm-svn: 261745
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp58
1 files changed, 58 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c7b4fa5ec6a..3ffd6666f55 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4332,6 +4332,17 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
return true;
}
+/// Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// sequential range (Low, Low+Size], or is undef or is zero.
+static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
+ unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
+ if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
+ return false;
+ return true;
+}
+
/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
/// extract that is suitable for instruction that extract 128 or 256 bit vectors
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
@@ -23666,6 +23677,53 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
return true;
}
+ // Attempt to blend with zero.
+ if (VT.getVectorNumElements() <= 8 &&
+ ((Subtarget.hasSSE41() && VT.is128BitVector()) ||
+ (Subtarget.hasAVX() && VT.is256BitVector()))) {
+ // Convert VT to a type compatible with X86ISD::BLENDI.
+ // TODO - add 16i16 support (requires lane duplication).
+ MVT ShuffleVT = VT;
+ if (Subtarget.hasAVX2()) {
+ if (VT == MVT::v4i64)
+ ShuffleVT = MVT::v8i32;
+ else if (VT == MVT::v2i64)
+ ShuffleVT = MVT::v4i32;
+ } else {
+ if (VT == MVT::v2i64 || VT == MVT::v4i32)
+ ShuffleVT = MVT::v8i16;
+ else if (VT == MVT::v4i64)
+ ShuffleVT = MVT::v4f64;
+ else if (VT == MVT::v8i32)
+ ShuffleVT = MVT::v8f32;
+ }
+
+ if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ Mask.size(),
+ /*Low*/ 0) &&
+ Mask.size() <= ShuffleVT.getVectorNumElements()) {
+ unsigned BlendMask = 0;
+ unsigned ShuffleSize = ShuffleVT.getVectorNumElements();
+ unsigned MaskRatio = ShuffleSize / Mask.size();
+
+ for (unsigned i = 0; i != ShuffleSize; ++i)
+ if (Mask[i / MaskRatio] < 0)
+ BlendMask |= 1u << i;
+
+ if (Root.getOpcode() != X86ISD::BLENDI ||
+ Root->getConstantOperandVal(2) != BlendMask) {
+ SDValue Zero = getZeroVector(ShuffleVT, Subtarget, DAG, DL);
+ Res = DAG.getBitcast(ShuffleVT, Input);
+ DCI.AddToWorklist(Res.getNode());
+ Res = DAG.getNode(X86ISD::BLENDI, DL, ShuffleVT, Res, Zero,
+ DAG.getConstant(BlendMask, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
+ }
+ }
+
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 2)
OpenPOWER on IntegriCloud