summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp33
1 files changed, 25 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b0efc4d1ed5..7f49e6cc206 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4975,6 +4975,11 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
return true;
}
+static bool canWidenShuffleElements(ArrayRef<int> Mask) {
+ SmallVector<int, 32> WidenedMask;
+ return canWidenShuffleElements(Mask, WidenedMask);
+}
+
/// Returns true if Elt is a constant zero or a floating point constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
return isNullConstant(Elt) || isNullFPConstant(Elt);
@@ -8954,6 +8959,12 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
}
+static bool
+is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
+ SmallVector<int, 32> RepeatedMask;
+ return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
+}
+
/// Test whether a shuffle mask is equivalent within each 256-bit lane.
static bool
is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
@@ -23438,12 +23449,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return R;
}
- // If possible, lower this packed shift into a vector multiply instead of
- // expanding it into a sequence of scalar shifts.
- if (Op.getOpcode() == ISD::SHL)
- if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
- return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
-
// If possible, lower this shift as a sequence of two shifts by
// constant plus a BLENDing shuffle instead of scalarizing it.
// Example:
@@ -23454,7 +23459,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
//
// The advantage is that the two shifts from the example would be
// lowered as X86ISD::VSRLI nodes in parallel before blending.
- if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32)) {
+ if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 ||
+ (VT == MVT::v16i16 && Subtarget.hasInt256()))) {
SDValue Amt1, Amt2;
unsigned NumElts = VT.getVectorNumElements();
SmallVector<int, 8> ShuffleMask;
@@ -23477,8 +23483,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
break;
}
+ // Only perform this blend if we can perform it without loading a mask.
if (ShuffleMask.size() == NumElts && Amt1 && Amt2 &&
- isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2)) {
+ isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2) &&
+ (VT != MVT::v16i16 ||
+ is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&
+ (VT == MVT::v4i32 || Subtarget.hasSSE41() ||
+ Op.getOpcode() != ISD::SHL || canWidenShuffleElements(ShuffleMask))) {
SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
@@ -23489,6 +23500,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // If possible, lower this packed shift into a vector multiply instead of
+ // expanding it into a sequence of scalar shifts.
+ if (Op.getOpcode() == ISD::SHL)
+ if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
+ return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
+
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
OpenPOWER on IntegriCloud