diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-04-04 07:17:47 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-04-04 07:17:47 +0000 |
| commit | e99c5613915db76eb47a04544aca430d86a69368 (patch) | |
| tree | 938508613e516f36a9b487262405e0fcc6a1b0f1 /llvm/lib/Target/X86 | |
| parent | 8e65f8ddfd88fa6b88152ee914b36a67c051f610 (diff) | |
| download | bcm5719-llvm-e99c5613915db76eb47a04544aca430d86a69368.tar.gz bcm5719-llvm-e99c5613915db76eb47a04544aca430d86a69368.zip | |
AVX-512: Truncating store for i1 vectors
Implemented truncstore for KNL and skylake-avx512.
Covered vectors from v2i1 to v64i1. We save the value in bits (not in bytes) - v32i1 is saved in 4 bytes.
Differential Revision: http://reviews.llvm.org/D18740
llvm-svn: 265283
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 63 |
1 files changed, 62 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 896533ce3fb..8c17a47674d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1394,6 +1394,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom); setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom); + setTruncStoreAction(VT, MaskVT, Custom); } setOperationAction(ISD::FADD, MVT::v16f32, Legal); setOperationAction(ISD::FSUB, MVT::v16f32, Legal); @@ -16106,6 +16107,65 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } +// Lower truncating store. We need a special lowering to vXi1 vectors +static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + StoreSDNode *St = cast<StoreSDNode>(StOp.getNode()); + SDLoc dl(St); + EVT MemVT = St->getMemoryVT(); + assert(St->isTruncatingStore() && "We only custom truncating store."); + assert(MemVT.isVector() && MemVT.getVectorElementType() == MVT::i1 && + "Expected truncstore of i1 vector"); + + SDValue Op = St->getValue(); + MVT OpVT = Op.getValueType().getSimpleVT(); + unsigned NumElts = OpVT.getVectorNumElements(); + if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) || + NumElts == 16) { + // Truncate and store - everything is legal + Op = DAG.getNode(ISD::TRUNCATE, dl, MemVT, Op); + if (MemVT.getSizeInBits() < 8) + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, + DAG.getUNDEF(MVT::v8i1), Op, + DAG.getIntPtrConstant(0, dl)); + return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(), + St->getMemOperand()); + } + + // A subset, assume that we have only AVX-512F + if (NumElts <= 8) { + if (NumElts < 8) { + // Extend to 8-elts vector + MVT ExtVT = MVT::getVectorVT(OpVT.getScalarType(), 8); + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ExtVT, + DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl)); + } + Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op); + return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(), + St->getMemOperand()); + } + // v32i8 + assert(OpVT == MVT::v32i8 && "Unexpected operand type"); + // Divide the vector into 2 parts and store each part separately + SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, Op, + DAG.getIntPtrConstant(0, dl)); + Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Lo); + SDValue BasePtr = St->getBasePtr(); + SDValue StLo = DAG.getStore(St->getChain(), dl, Lo, BasePtr, + St->getMemOperand()); + SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, Op, + DAG.getIntPtrConstant(16, dl)); + Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi); + + SDValue BasePtrHi = + DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(2, dl, BasePtr.getValueType())); + + SDValue StHi = DAG.getStore(St->getChain(), dl, Hi, + BasePtrHi, St->getMemOperand()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi); +} + static SDValue LowerExtended1BitVectorLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -21444,6 +21504,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GC_TRANSITION_START: return LowerGC_TRANSITION_START(Op, DAG); case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG); + case ISD::STORE: return LowerTruncatingStore(Op, Subtarget, DAG); } } @@ -28021,7 +28082,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw // are designated for truncate store. // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegal(VT, StVT)) + if (TLI.isTruncStoreLegalOrCustom(VT, StVT)) return SDValue(); // From, To sizes and ElemCount must be pow of two |

