diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 46 |
1 files changed, 44 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6ab1daabec2..ea438a882aa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27323,12 +27323,54 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, return DCI.CombineTo(ML, Insert, Load.getValue(1), true); } +/// Convert a masked load with a constant mask into a masked load and a shuffle. +/// This allows the blend operation to use a faster kind of shuffle instruction +/// (for example, vblendvps -> vblendps). +static SDValue +combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // Don't try this if the pass-through operand is already undefined. That would + // cause an infinite loop because that's what we're about to create. + if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()) || + ML->getSrc0().getOpcode() == ISD::UNDEF) + return SDValue(); + + // Convert the masked load's mask into a blend mask for a vector shuffle node. + EVT VT = ML->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask()); + SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef); + for (unsigned i = 0; i < NumElts; ++i) { + // If this mask bit of the masked load is false, the pass-through vector + // (Src0) element will be selected for that vector lane. + if (MaskBV->getOperand(i).getOpcode() != ISD::UNDEF) + ShufMask[i] = isNullConstant(MaskBV->getOperand(i)) ? i + NumElts : i; + } + + // The new masked load has an undef pass-through operand. The shuffle uses the + // original pass-through operand. + SDLoc DL(ML); + SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(), + ML->getMask(), DAG.getUNDEF(VT), + ML->getMemoryVT(), ML->getMemOperand(), + ML->getExtensionType()); + SDValue Blend = DAG.getVectorShuffle(VT, DL, NewML, ML->getSrc0(), ShufMask); + + return DCI.CombineTo(ML, Blend, NewML.getValue(1), true); +} + static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N); - if (Mld->getExtensionType() == ISD::NON_EXTLOAD) - return reduceMaskedLoadToScalarLoad(Mld, DAG, DCI); + if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { + if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI)) + return ScalarLoad; + // TODO: Do some AVX512 subsets benefit from this transform? + if (!Subtarget.hasAVX512()) + if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI)) + return Blend; + } if (Mld->getExtensionType() != ISD::SEXTLOAD) return SDValue(); |

