summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp43
1 files changed, 43 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d08c544f335..6fbeb62389c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26748,10 +26748,53 @@ static int getOneTrueElt(SDValue V) {
return TrueIndex;
};
+/// If exactly one element of the mask is set for a non-extending masked load,
+/// it is a scalar load and vector insert.
+/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
+/// mask have already been optimized in IR, so we don't bother with those here.
+static SDValue
+reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // FIXME: Refactor shared/similar logic with reduceMaskedStoreToScalarStore().
+
+ // TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
+ // However, some target hooks may need to be added to know when the transform
+ // is profitable. Endianness would also have to be considered.
+
+ int TrueMaskElt = getOneTrueElt(ML->getMask());
+ if (TrueMaskElt < 0)
+ return SDValue();
+
+ SDLoc DL(ML);
+ EVT VT = ML->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+
+ // Load the one scalar element that is specified by the mask using the
+ // appropriate offset from the base pointer.
+ SDValue Addr = ML->getBasePtr();
+ if (TrueMaskElt != 0) {
+ unsigned Offset = TrueMaskElt * EltVT.getStoreSize();
+ Addr = DAG.getMemBasePlusOffset(Addr, Offset, DL);
+ }
+ unsigned Alignment = MinAlign(ML->getAlignment(), EltVT.getStoreSize());
+ SDValue Load = DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
+ ML->getPointerInfo(), ML->isVolatile(),
+ ML->isNonTemporal(), ML->isInvariant(), Alignment);
+
+ // Insert the loaded element into the appropriate place in the vector.
+ SDValue InsertIndex = DAG.getIntPtrConstant(TrueMaskElt, DL);
+ SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getSrc0(),
+ Load, InsertIndex);
+ return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
+}
+
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
+ if (Mld->getExtensionType() == ISD::NON_EXTLOAD)
+ return reduceMaskedLoadToScalarLoad(Mld, DAG, DCI);
+
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
OpenPOWER on IntegriCloud