diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-09-21 12:01:19 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-09-21 12:01:19 +0000 |
| commit | 88404c4f9b6388814863df8224910edd47b1ede6 (patch) | |
| tree | 3b23a18afb9b92f34a99b15466a653370f2036b6 /llvm/lib/Target/X86 | |
| parent | 83252ac8f4cad5a716f705301e5abd9851cd469a (diff) | |
| download | bcm5719-llvm-88404c4f9b6388814863df8224910edd47b1ede6.tar.gz bcm5719-llvm-88404c4f9b6388814863df8224910edd47b1ede6.zip | |
[x86] Begin teaching the new vector shuffle lowering among the most
important bits of cleverness: to detect and lower repeated shuffle
patterns between the two 128-bit lanes with a single instruction.
This patch just teaches it how to lower single-input shuffles that fit
this model using VPERMILPS. =] There is more that needs to happen here.
llvm-svn: 218211
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 30 |
1 files changed, 28 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bfe52247b7a..78cbcc66127 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9129,6 +9129,20 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { return false; } +/// \brief Test whether a shuffle mask is equivalent within each 128-bit lane. +/// +/// This checks a shuffle mask to see if it is performing the same +/// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies +/// that it is also not lane-crossing. +static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) { + int LaneSize = 128 / VT.getScalarSizeInBits(); + int Size = Mask.size(); + for (int i = LaneSize; i < Size; ++i) + if (Mask[i] >= 0 && Mask[i] != (Mask[i % LaneSize] + (i / LaneSize) * LaneSize)) + return false; + return true; +} + /// \brief Generic routine to split a 256-bit vector shuffle into 128-bit /// shuffles. /// @@ -9316,14 +9330,26 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - if (is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask) || - isSingleInputShuffleMask(Mask)) + if (is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG); if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, DAG)) return Blend; + // If the shuffle mask is repeated in each 128-bit lane, we have many more + // options to efficiently lower the shuffle. + if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask)) { + ArrayRef<int> LoMask = Mask.slice(0, 4); + if (isSingleInputShuffleMask(Mask)) + return DAG.getNode(X86ISD::VPERMILP, DL, MVT::v8f32, V1, + getV4X86ShuffleImm8ForMask(LoMask, DAG)); + } + + if (isSingleInputShuffleMask(Mask)) + // FIXME: We can do better than just falling back blindly. + return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG); + // Shuffle the input elements into the desired positions in V1 and V2 and // blend them together. int V1Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1}; |

