diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 64 |
1 files changed, 42 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8b102e4fbb9..c6f73baeb2a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7182,21 +7182,6 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) { return true; } -/// \brief Check wether all of one set of inputs to a shuffle mask are in place. -/// -/// Mask entries pointing at the other input or undef will be skipped. -static bool isShuffleMaskInputInPlace(ArrayRef<int> Mask, bool LoInput = true) { - int Size = Mask.size(); - for (int i = 0; i < Size; ++i) { - int M = Mask[i]; - if (M == -1 || (LoInput && M >= 4) || (!LoInput && M < 4)) - continue; - if (M - (LoInput ? 0 : Size) != i) - return false; - } - return true; -} - // Hide this symbol with an anonymous namespace instead of 'static' so that MSVC // 2013 will allow us to use it as a non-type template parameter. namespace { @@ -7385,13 +7370,48 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // INSERTPS when the V1 elements are already in the correct locations // because otherwise we can just always use two SHUFPS instructions which // are much smaller to encode than a SHUFPS and an INSERTPS. - if (Subtarget->hasSSE41() && - isShuffleMaskInputInPlace(Mask, /*LoInput*/ true)) { - // Insert the V2 element into the desired position. - SDValue InsertPSMask = - DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4); - return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - InsertPSMask); + if (Subtarget->hasSSE41()) { + // When using INSERTPS we can zero any lane of the destination. Collect + // the zero inputs into a mask and drop them from the lanes of V1 which + // actually need to be present as inputs to the INSERTPS. + unsigned ZMask = 0; + if (ISD::isBuildVectorAllZeros(V1.getNode())) { + ZMask = 0xF ^ (1 << V2Index); + } else if (V1.getOpcode() == ISD::BUILD_VECTOR) { + for (int i = 0; i < 4; ++i) { + int M = Mask[i]; + if (M >= 4) + continue; + if (M > -1) { + SDValue Input = V1.getOperand(M); + if (Input.getOpcode() != ISD::UNDEF && + !X86::isZeroNode(Input)) { + // A non-zero input! + ZMask = 0; + break; + } + } + ZMask |= 1 << i; + } + } + + // Synthesize a shuffle mask for the non-zero and non-v2 inputs. + int InsertShuffleMask[4] = {-1, -1, -1, -1}; + for (int i = 0; i < 4; ++i) + if (i != V2Index && (ZMask & (1 << i)) == 0) + InsertShuffleMask[i] = Mask[i]; + + if (isNoopShuffleMask(InsertShuffleMask)) { + // Replace V1 with undef if nothing from V1 survives the INSERTPS. + if ((ZMask | 1 << V2Index) == 0xF) + V1 = DAG.getUNDEF(MVT::v4f32); + + // Insert the V2 element into the desired position. + SDValue InsertPSMask = + DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4 | ZMask); + return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, + InsertPSMask); + } } // Compute the index adjacent to V2Index and in the same half by toggling |

