summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp42
1 files changed, 30 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a556f35008c..06808a5e111 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9599,23 +9599,41 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (Subtarget->hasSSSE3()) {
SDValue V1Mask[16];
SDValue V2Mask[16];
- for (int i = 0; i < 16; ++i)
+ bool V1InUse = false;
+ bool V2InUse = false;
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
} else {
- V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
- V2Mask[i] =
- DAG.getConstant(Mask[i] < 16 ? 0x80 : Mask[i] - 16, MVT::i8);
+ const int ZeroMask = 0x80;
+ int V1Idx = (Mask[i] < 16 ? Mask[i] : ZeroMask);
+ int V2Idx = (Mask[i] < 16 ? ZeroMask : Mask[i] - 16);
+ if (Zeroable[i])
+ V1Idx = V2Idx = ZeroMask;
+ V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8);
+ V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8);
+ V1InUse |= (ZeroMask != V1Idx);
+ V2InUse |= (ZeroMask != V2Idx);
}
- V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
- if (isSingleInputShuffleMask(Mask))
- return V1; // Single inputs are easy.
+ }
+ assert((V1InUse || V2InUse) && "Shuffling to a zeroable vector");
- // Otherwise, blend the two.
- V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
- return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ if (V1InUse)
+ V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
+ if (V2InUse)
+ V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
+
+ // If we need shuffled inputs from both, blend the two.
+ if (V1InUse && V2InUse)
+ return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ if (V1InUse)
+ return V1; // Single inputs are easy.
+ if (V2InUse)
+ return V2; // Single inputs are easy.
}
// There are special ways we can lower some single-element blends.
OpenPOWER on IntegriCloud