summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-02-03 21:58:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-02-03 21:58:29 +0000
commit46cd4f74005b2db54399ed244f9945232d07e7be (patch)
treea9cff7138318ecdcd66015cb7f440ed1817a6946 /llvm/lib/Target
parentfe88b189905e95027a747a653bf77ed03869208b (diff)
downloadbcm5719-llvm-46cd4f74005b2db54399ed244f9945232d07e7be.tar.gz
bcm5719-llvm-46cd4f74005b2db54399ed244f9945232d07e7be.zip
[X86][SSE] psrl(w/d/q) and psll(w/d/q) bit shifts for SSE2
Patch to match cases where shuffle masks can be reduced to bit shifts. Similar to byte shift shuffle matching from D5699. Differential Revision: http://reviews.llvm.org/D6649 llvm-svn: 228047
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp108
1 files changed, 108 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4f00beacd32..ea6dcf51c63 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7808,6 +7808,79 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
return SDValue();
}
+/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
+///
+/// Attempts to match a shuffle mask against the PSRL(W/D/Q) and PSLL(W/D/Q)
+/// SSE2 and AVX2 logical bit-shift instructions. The function matches
+/// elements from one of the input vectors shuffled to the left or right
+/// with zeroable elements 'shifted in'.
+static SDValue lowerVectorShuffleAsBitShift(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ int Size = Mask.size();
+ assert(Size == VT.getVectorNumElements() && "Unexpected mask size");
+
+ // PSRL : (little-endian) right bit shift.
+ // [ 1, zz, 3, zz]
+ // [ -1, -1, 7, zz]
+ // PSHL : (little-endian) left bit shift.
+ // [ zz, 0, zz, 2 ]
+ // [ -1, 4, zz, -1 ]
+ auto MatchBitShift = [&](int Shift, int Scale) -> SDValue {
+ MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
+ MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
+ assert(TLI.isTypeLegal(ShiftVT) && "Illegal integer vector type");
+
+ bool MatchLeft = true, MatchRight = true;
+ for (int i = 0; i != Size; i += Scale) {
+ for (int j = 0; j != Shift; j++) {
+ MatchLeft &= Zeroable[i + j];
+ }
+ for (int j = Scale - Shift; j != Scale; j++) {
+ MatchRight &= Zeroable[i + j];
+ }
+ }
+ if (!(MatchLeft || MatchRight))
+ return SDValue();
+
+ bool MatchV1 = true, MatchV2 = true;
+ for (int i = 0; i != Size; i += Scale) {
+ unsigned Pos = MatchLeft ? i + Shift : i;
+ unsigned Low = MatchLeft ? i : i + Shift;
+ unsigned Len = Scale - Shift;
+ MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
+ MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + Size);
+ }
+ if (!(MatchV1 || MatchV2))
+ return SDValue();
+
+ // Cast the inputs to ShiftVT to match VSRLI/VSHLI and back again.
+ unsigned OpCode = MatchLeft ? X86ISD::VSHLI : X86ISD::VSRLI;
+ int ShiftAmt = Shift * VT.getScalarSizeInBits();
+ SDValue V = MatchV1 ? V1 : V2;
+ V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
+ V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT, V);
+ };
+
+ // SSE/AVX supports logical shifts up to 64-bit integers - so we can just
+ // keep doubling the size of the integer elements up to that. We can
+ // then shift the elements of the integer vector by whole multiples of
+ // their width within the elements of the larger integer vector. Test each
+ // multiple to see if we can find a match with the moved element indices
+ // and that the shifted in elements are all zeroable.
+ for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 64; Scale *= 2)
+ for (int Shift = 1; Shift != Scale; Shift++)
+ if (SDValue BitShift = MatchBitShift(Shift, Scale))
+ return BitShift;
+
+ // no match
+ return SDValue();
+}
+
/// \brief Lower a vector shuffle as a zero or any extension.
///
/// Given a specific number of elements, element bit width, and extension
@@ -8654,6 +8727,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v4i32, V1, V2, Mask, DAG))
+ return Shift;
+
// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v4i32, V1, V2, Mask, DAG))
@@ -8739,6 +8817,11 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
Mask, Subtarget, DAG))
return Broadcast;
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v8i16, V, V, Mask, DAG))
+ return Shift;
+
// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v8i16, V, V, Mask, DAG))
@@ -9356,6 +9439,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
"to be V1-input shuffles.");
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return Shift;
+
// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v8i16, V1, V2, Mask, DAG))
@@ -9512,6 +9600,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> OrigMask = SVOp->getMask();
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v16i8, V1, V2, OrigMask, DAG))
+ return Shift;
+
// Try to use byte shift instructions.
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
@@ -10602,6 +10695,11 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
}
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v8i32, V1, V2, Mask, DAG))
+ return Shift;
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -10685,6 +10783,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
}
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v16i16, V1, V2, Mask, DAG))
+ return Shift;
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -10763,6 +10866,11 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
}
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v32i8, V1, V2, Mask, DAG))
+ return Shift;
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
OpenPOWER on IntegriCloud