summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp46
1 files changed, 44 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6ab1daabec2..ea438a882aa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27323,12 +27323,54 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
}
+/// Convert a masked load with a constant mask into a masked load and a shuffle.
+/// This allows the blend operation to use a faster kind of shuffle instruction
+/// (for example, vblendvps -> vblendps).
+static SDValue
+combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Don't try this if the pass-through operand is already undefined. That would
+ // cause an infinite loop because that's what we're about to create.
+ if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()) ||
+ ML->getSrc0().getOpcode() == ISD::UNDEF)
+ return SDValue();
+
+ // Convert the masked load's mask into a blend mask for a vector shuffle node.
+ EVT VT = ML->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask());
+ SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ // If this mask bit of the masked load is false, the pass-through vector
+ // (Src0) element will be selected for that vector lane.
+ if (MaskBV->getOperand(i).getOpcode() != ISD::UNDEF)
+ ShufMask[i] = isNullConstant(MaskBV->getOperand(i)) ? i + NumElts : i;
+ }
+
+ // The new masked load has an undef pass-through operand. The shuffle uses the
+ // original pass-through operand.
+ SDLoc DL(ML);
+ SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
+ ML->getMask(), DAG.getUNDEF(VT),
+ ML->getMemoryVT(), ML->getMemOperand(),
+ ML->getExtensionType());
+ SDValue Blend = DAG.getVectorShuffle(VT, DL, NewML, ML->getSrc0(), ShufMask);
+
+ return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
+}
+
static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
- if (Mld->getExtensionType() == ISD::NON_EXTLOAD)
- return reduceMaskedLoadToScalarLoad(Mld, DAG, DCI);
+ if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
+ return ScalarLoad;
+ // TODO: Do some AVX512 subsets benefit from this transform?
+ if (!Subtarget.hasAVX512())
+ if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
+ return Blend;
+ }
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
OpenPOWER on IntegriCloud