diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-09-03 22:48:34 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-09-03 22:48:34 +0000 |
| commit | dad54003976281509c8bd096e7ccd88ea406f6d8 (patch) | |
| tree | da2ed185ad0c142a4ea36470da79d15edf9a46e0 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
| parent | 2317311825e1f7464759895108d7723a153cbefb (diff) | |
| download | bcm5719-llvm-dad54003976281509c8bd096e7ccd88ea406f6d8.tar.gz bcm5719-llvm-dad54003976281509c8bd096e7ccd88ea406f6d8.zip | |
[x86] Teach the new vector shuffle lowering about the simplest of
'insertps' patterns.
This replaces two shuffles with a single insertps in very common cases.
My next patch will extend this to leverage the zeroing capabilities of
insertps which will allow it to be used in a much wider set of cases.
llvm-svn: 217100
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b024f331d10..8b102e4fbb9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7182,6 +7182,21 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) { return true; } +/// \brief Check wether all of one set of inputs to a shuffle mask are in place. +/// +/// Mask entries pointing at the other input or undef will be skipped. +static bool isShuffleMaskInputInPlace(ArrayRef<int> Mask, bool LoInput = true) { + int Size = Mask.size(); + for (int i = 0; i < Size; ++i) { + int M = Mask[i]; + if (M == -1 || (LoInput && M >= 4) || (!LoInput && M < 4)) + continue; + if (M - (LoInput ? 0 : Size) != i) + return false; + } + return true; +} + // Hide this symbol with an anonymous namespace instead of 'static' so that MSVC // 2013 will allow us to use it as a non-type template parameter. namespace { @@ -7365,6 +7380,20 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, int V2Index = std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) - Mask.begin(); + + // Check for whether we can use INSERTPS to perform the blend. We only use + // INSERTPS when the V1 elements are already in the correct locations + // because otherwise we can just always use two SHUFPS instructions which + // are much smaller to encode than a SHUFPS and an INSERTPS. + if (Subtarget->hasSSE41() && + isShuffleMaskInputInPlace(Mask, /*LoInput*/ true)) { + // Insert the V2 element into the desired position. + SDValue InsertPSMask = + DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4); + return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, + InsertPSMask); + } + // Compute the index adjacent to V2Index and in the same half by toggling // the low bit. int V2AdjIndex = V2Index ^ 1; |

