summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-09-21 12:01:19 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-09-21 12:01:19 +0000
commit88404c4f9b6388814863df8224910edd47b1ede6 (patch)
tree3b23a18afb9b92f34a99b15466a653370f2036b6 /llvm/lib/Target/X86
parent83252ac8f4cad5a716f705301e5abd9851cd469a (diff)
downloadbcm5719-llvm-88404c4f9b6388814863df8224910edd47b1ede6.tar.gz
bcm5719-llvm-88404c4f9b6388814863df8224910edd47b1ede6.zip
[x86] Begin teaching the new vector shuffle lowering among the most
important bits of cleverness: to detect and lower repeated shuffle patterns between the two 128-bit lanes with a single instruction. This patch just teaches it how to lower single-input shuffles that fit this model using VPERMILPS. =] There is more that needs to happen here. llvm-svn: 218211
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp30
1 files changed, 28 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bfe52247b7a..78cbcc66127 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9129,6 +9129,20 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
return false;
}
+/// \brief Test whether a shuffle mask is equivalent within each 128-bit lane.
+///
+/// This checks a shuffle mask to see if it is performing the same
+/// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies
+/// that it is also not lane-crossing.
+static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ int Size = Mask.size();
+ for (int i = LaneSize; i < Size; ++i)
+ if (Mask[i] >= 0 && Mask[i] != (Mask[i % LaneSize] + (i / LaneSize) * LaneSize))
+ return false;
+ return true;
+}
+
/// \brief Generic routine to split a 256-bit vector shuffle into 128-bit
/// shuffles.
///
@@ -9316,14 +9330,26 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- if (is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask) ||
- isSingleInputShuffleMask(Mask))
+ if (is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG);
if (SDValue Blend =
lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, DAG))
return Blend;
+ // If the shuffle mask is repeated in each 128-bit lane, we have many more
+ // options to efficiently lower the shuffle.
+ if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask)) {
+ ArrayRef<int> LoMask = Mask.slice(0, 4);
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMILP, DL, MVT::v8f32, V1,
+ getV4X86ShuffleImm8ForMask(LoMask, DAG));
+ }
+
+ if (isSingleInputShuffleMask(Mask))
+ // FIXME: We can do better than just falling back blindly.
+ return splitAndLower256BitVectorShuffle(Op, V1, V2, Subtarget, DAG);
+
// Shuffle the input elements into the desired positions in V1 and V2 and
// blend them together.
int V1Mask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
OpenPOWER on IntegriCloud