summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp115
1 files changed, 106 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f79f7b70a9d..58398df2059 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6130,6 +6130,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
}
+ if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD &&
+ EltSizeInBits <= VT.getScalarSizeInBits()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
+ if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits())
+ return false;
+
+ SDValue Ptr = MemIntr->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper ||
+ Ptr->getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr->getOperand(0);
+
+ auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!CNode || CNode->isMachineConstantPoolEntry() ||
+ CNode->getOffset() != 0)
+ return false;
+
+ if (const Constant *C = CNode->getConstVal()) {
+ unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
+ if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) {
+ if (UndefSrcElts[0])
+ UndefSrcElts.setBits(0, NumSrcElts);
+ SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
+ return CastBitData(UndefSrcElts, SrcEltBits);
+ }
+ }
+ }
+
// Extract constant bits from a subvector broadcast.
if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) {
SmallVector<APInt, 16> SubEltBits;
@@ -28582,6 +28613,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VBROADCAST_LOAD: return "X86ISD::VBROADCAST_LOAD";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
@@ -33347,6 +33379,19 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+ // vbroadcast(scalarload X) -> vbroadcast_load X
+ if (!SrcVT.isVector() && Src.hasOneUse() &&
+ ISD::isNormalLoad(Src.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+
// Share broadcast with the longest vector and extract low subvector (free).
for (SDNode *User : Src->uses())
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
@@ -33512,17 +33557,23 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
- // If we're inserting an element from a vbroadcast of a load, fold the
+ // If we're inserting an element from a vbroadcast load, fold the
// load into the X86insertps instruction. We need to convert the scalar
// load to a vector and clear the source lane of the INSERTPS control.
- if (Op1.getOpcode() == X86ISD::VBROADCAST && Op1.hasOneUse() &&
- Op1.getOperand(0).hasOneUse() &&
- !Op1.getOperand(0).getValueType().isVector() &&
- ISD::isNormalLoad(Op1.getOperand(0).getNode()))
- return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
- Op1.getOperand(0)),
- DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
+ if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
+ if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
+ SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
+ MemIntr->getBasePtr(),
+ MemIntr->getMemOperand());
+ SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
+ Load),
+ DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
+ return Insert;
+ }
+ }
return SDValue();
}
@@ -35851,6 +35902,23 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(VT, SrcOp);
}
+ // If we're extracting a single element from a broadcast load and there are
+ // no other users, just create a single load.
+ if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
+ unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
+ if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
+ VT.getSizeInBits() == SrcBCWidth) {
+ SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
+ MemIntr->getBasePtr(),
+ MemIntr->getPointerInfo(),
+ MemIntr->getAlignment(),
+ MemIntr->getMemOperand()->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
+ return Load;
+ }
+ }
+
// Handle extract(truncate(x)) for 0'th index.
// TODO: Treat this as a faux shuffle?
// TODO: When can we use this for general indices?
@@ -43893,6 +43961,21 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));
+ // If this is a broadcast load inserted into an upper undef, use a larger
+ // broadcast load.
+ if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() &&
+ SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec);
+ SDVTList Tys = DAG.getVTList(OpVT, MVT::Other);
+ SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
+ MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+
return SDValue();
}
@@ -44065,6 +44148,20 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits())
return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0));
+ if (InVec.getOpcode() == X86ISD::VBROADCAST_LOAD && InVec.hasOneUse()) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(InVec);
+ if (MemIntr->getMemoryVT().getSizeInBits() <= VT.getSizeInBits()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
+ MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+ }
+
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
if (IdxVal == 0 && InVec.hasOneUse()) {
OpenPOWER on IntegriCloud