diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 115 |
1 files changed, 106 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f79f7b70a9d..58398df2059 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6130,6 +6130,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, } } + if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD && + EltSizeInBits <= VT.getScalarSizeInBits()) { + auto *MemIntr = cast<MemIntrinsicSDNode>(Op); + if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits()) + return false; + + SDValue Ptr = MemIntr->getBasePtr(); + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) + Ptr = Ptr->getOperand(0); + + auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); + if (!CNode || CNode->isMachineConstantPoolEntry() || + CNode->getOffset() != 0) + return false; + + if (const Constant *C = CNode->getConstVal()) { + unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); + if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { + if (UndefSrcElts[0]) + UndefSrcElts.setBits(0, NumSrcElts); + SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); + return CastBitData(UndefSrcElts, SrcEltBits); + } + } + } + // Extract constant bits from a subvector broadcast. if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) { SmallVector<APInt, 16> SubEltBits; @@ -28582,6 +28613,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VBROADCAST_LOAD: return "X86ISD::VBROADCAST_LOAD"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; @@ -33347,6 +33379,19 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); + // vbroadcast(scalarload X) -> vbroadcast_load X + if (!SrcVT.isVector() && Src.hasOneUse() && + ISD::isNormalLoad(Src.getNode())) { + LoadSDNode *LN = cast<LoadSDNode>(Src); + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, + LN->getMemoryVT(), LN->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); + return BcastLd; + } + // Share broadcast with the longest vector and extract low subvector (free). for (SDNode *User : Src->uses()) if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && @@ -33512,17 +33557,23 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } - // If we're inserting an element from a vbroadcast of a load, fold the + // If we're inserting an element from a vbroadcast load, fold the // load into the X86insertps instruction. We need to convert the scalar // load to a vector and clear the source lane of the INSERTPS control. - if (Op1.getOpcode() == X86ISD::VBROADCAST && Op1.hasOneUse() && - Op1.getOperand(0).hasOneUse() && - !Op1.getOperand(0).getValueType().isVector() && - ISD::isNormalLoad(Op1.getOperand(0).getNode())) - return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, - DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, - Op1.getOperand(0)), - DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); + if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) { + auto *MemIntr = cast<MemIntrinsicSDNode>(Op1); + if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { + SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), + MemIntr->getBasePtr(), + MemIntr->getMemOperand()); + SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, + Load), + DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); + return Insert; + } + } return SDValue(); } @@ -35851,6 +35902,23 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT, SrcOp); } + // If we're extracting a single element from a broadcast load and there are + // no other users, just create a single load. + if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) { + auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC); + unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits(); + if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && + VT.getSizeInBits() == SrcBCWidth) { + SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), + MemIntr->getBasePtr(), + MemIntr->getPointerInfo(), + MemIntr->getAlignment(), + MemIntr->getMemOperand()->getFlags()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); + return Load; + } + } + // Handle extract(truncate(x)) for 0'th index. // TODO: Treat this as a faux shuffle? // TODO: When can we use this for general indices? @@ -43893,6 +43961,21 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); + // If this is a broadcast load inserted into an upper undef, use a larger + // broadcast load. + if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && + SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { + auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec); + SDVTList Tys = DAG.getVTList(OpVT, MVT::Other); + SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, + MemIntr->getMemoryVT(), + MemIntr->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); + return BcastLd; + } + return SDValue(); } @@ -44065,6 +44148,20 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits()) return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0)); + if (InVec.getOpcode() == X86ISD::VBROADCAST_LOAD && InVec.hasOneUse()) { + auto *MemIntr = cast<MemIntrinsicSDNode>(InVec); + if (MemIntr->getMemoryVT().getSizeInBits() <= VT.getSizeInBits()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, + MemIntr->getMemoryVT(), + MemIntr->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); + return BcastLd; + } + } + // If we're extracting the lowest subvector and we're the only user, // we may be able to perform this with a smaller vector width. if (IdxVal == 0 && InVec.hasOneUse()) { |

