diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 69 |
1 files changed, 64 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 18f32768a06..e8b3f3656b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1177,6 +1177,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); + // There is no byte sized k-register load or store without AVX512DQ. + if (!Subtarget.hasDQI()) { + setOperationAction(ISD::LOAD, MVT::v1i1, Custom); + setOperationAction(ISD::LOAD, MVT::v2i1, Custom); + setOperationAction(ISD::LOAD, MVT::v4i1, Custom); + setOperationAction(ISD::LOAD, MVT::v8i1, Custom); + + setOperationAction(ISD::STORE, MVT::v1i1, Custom); + setOperationAction(ISD::STORE, MVT::v2i1, Custom); + setOperationAction(ISD::STORE, MVT::v4i1, Custom); + setOperationAction(ISD::STORE, MVT::v8i1, Custom); + } + // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SIGN_EXTEND, VT, Custom); @@ -18983,6 +18996,30 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } +static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + StoreSDNode *St = cast<StoreSDNode>(Op.getNode()); + EVT VT = St->getValue().getValueType(); + SDLoc dl(St); + SDValue StoredVal = St->getOperand(1); + + // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads. + assert(VT.isVector() && VT.getVectorElementType() == MVT::i1 && + VT.getVectorNumElements() <= 8 && "Unexpected VT"); + assert(!St->isTruncatingStore() && "Expected non-truncating store"); + assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && + "Expected AVX512F without AVX512DQI"); + + StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, + DAG.getUNDEF(MVT::v8i1), StoredVal, + DAG.getIntPtrConstant(0, dl)); + StoredVal = DAG.getBitcast(MVT::i8, StoredVal); + + return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); +} + // Lower vector extended loads using a shuffle. If SSSE3 is not available we // may emit an illegal shuffle but the expansion is still better than scalar // code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise @@ -18990,20 +19027,41 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, // FIXME: Is the expansion actually better than scalar code? It doesn't seem so. // TODO: It is possible to support ZExt by zeroing the undef values during // the shuffle phase or after the shuffle. -static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, +static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT RegVT = Op.getSimpleValueType(); assert(RegVT.isVector() && "We only custom lower vector sext loads."); assert(RegVT.isInteger() && "We only custom lower integer vector sext loads."); - // Nothing useful we can do without SSE2 shuffles. - assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); - LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode()); SDLoc dl(Ld); EVT MemVT = Ld->getMemoryVT(); + // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads. + if (RegVT.isVector() && RegVT.getVectorElementType() == MVT::i1) { + assert(EVT(RegVT) == MemVT && "Expected non-extending load"); + assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); + assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && + "Expected AVX512F without AVX512DQI"); + + SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->getAlignment(), + Ld->getMemOperand()->getFlags()); + + // Replace chain users with the new chain. + assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1)); + + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, + DAG.getBitcast(MVT::v8i1, NewLd), + DAG.getIntPtrConstant(0, dl)); + return DAG.getMergeValues({Extract, NewLd.getValue(1)}, dl); + } + + // Nothing useful we can do without SSE2 shuffles. + assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned RegSz = RegVT.getSizeInBits(); @@ -24766,7 +24824,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG); + case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); + case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FABS: case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); |

