summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-02-23 21:41:42 +0000
committerCraig Topper <craig.topper@intel.com>2019-02-23 21:41:42 +0000
commitbe9eeb552678f4aaa7293f43dc4d0881cdf5df3a (patch)
tree6ed1495d19b4a4e747068e3daee978b68d47cc2c /llvm/lib/Target/X86/X86ISelLowering.cpp
parent10ab78e854f2365aa695464c81946c831c6affbb (diff)
downloadbcm5719-llvm-be9eeb552678f4aaa7293f43dc4d0881cdf5df3a.tar.gz
bcm5719-llvm-be9eeb552678f4aaa7293f43dc4d0881cdf5df3a.zip
Recommit r354363 "[X86][SSE] Generalize X86ISD::BLENDI support to more value types"
And its follow ups r354511, r354640. A follow patch will fix the issue that caused it to be reverted. llvm-svn: 354737
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp100
1 files changed, 40 insertions, 60 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aa9fd6bc03a..f90a2478306 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10482,45 +10482,24 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
V2 = getZeroVector(VT, Subtarget, DAG, DL);
switch (VT.SimpleTy) {
- case MVT::v2f64:
- case MVT::v4f32:
- case MVT::v4f64:
- case MVT::v8f32:
- return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
LLVM_FALLTHROUGH;
+ case MVT::v4f64:
+ case MVT::v8f32:
+ assert(Subtarget.hasAVX() && "256-bit float blends require AVX!");
+ LLVM_FALLTHROUGH;
+ case MVT::v2f64:
case MVT::v2i64:
+ case MVT::v4f32:
case MVT::v4i32:
- // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
- // that instruction.
- if (Subtarget.hasAVX2()) {
- // Scale the blend by the number of 32-bit dwords per element.
- int Scale = VT.getScalarSizeInBits() / 32;
- BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
- MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
- V1 = DAG.getBitcast(BlendVT, V1);
- V2 = DAG.getBitcast(BlendVT, V2);
- return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
- }
- LLVM_FALLTHROUGH;
- case MVT::v8i16: {
- // For integer shuffles we need to expand the mask and cast the inputs to
- // v8i16s prior to blending.
- int Scale = 8 / VT.getVectorNumElements();
- BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
- V1 = DAG.getBitcast(MVT::v8i16, V1);
- V2 = DAG.getBitcast(MVT::v8i16, V2);
- return DAG.getBitcast(VT,
- DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
- }
+ case MVT::v8i16:
+ assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
+ DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v16i16: {
- assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
+ assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
@@ -10548,10 +10527,11 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
}
LLVM_FALLTHROUGH;
}
- case MVT::v16i8:
- case MVT::v32i8: {
- assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
- "256-bit byte-blends require AVX2 support!");
+ case MVT::v32i8:
+ assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!");
+ LLVM_FALLTHROUGH;
+ case MVT::v16i8: {
+ assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!");
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
@@ -31055,34 +31035,11 @@ static bool matchBinaryPermuteShuffle(
return true;
}
} else {
- // Determine a type compatible with X86ISD::BLENDI.
- ShuffleVT = MaskVT;
- if (Subtarget.hasAVX2()) {
- if (ShuffleVT == MVT::v4i64)
- ShuffleVT = MVT::v8i32;
- else if (ShuffleVT == MVT::v2i64)
- ShuffleVT = MVT::v4i32;
- } else {
- if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
- ShuffleVT = MVT::v8i16;
- else if (ShuffleVT == MVT::v4i64)
- ShuffleVT = MVT::v4f64;
- else if (ShuffleVT == MVT::v8i32)
- ShuffleVT = MVT::v8f32;
- }
-
- if (!ShuffleVT.isFloatingPoint()) {
- int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
- BlendMask =
- scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
- ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
- ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
- }
-
V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
PermuteImm = (unsigned)BlendMask;
Shuffle = X86ISD::BLENDI;
+ ShuffleVT = MaskVT;
return true;
}
}
@@ -32239,6 +32196,29 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return SDValue();
}
+ case X86ISD::BLENDI: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+
+ // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.
+ // TODO: Handle MVT::v16i16 repeated blend mask.
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+ MVT SrcVT = N0.getOperand(0).getSimpleValueType();
+ if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ SrcVT.getScalarSizeInBits() >= 32) {
+ unsigned Mask = N.getConstantOperandVal(2);
+ unsigned Size = VT.getVectorNumElements();
+ unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
+ unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
+ N1.getOperand(0),
+ DAG.getConstant(ScaleMask, DL, MVT::i8)));
+ }
+ }
+ return SDValue();
+ }
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
OpenPOWER on IntegriCloud