diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-11-03 03:23:55 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-11-03 03:23:55 +0000 |
| commit | caaceef4b381f0ebeda53889004662c802ce2cab (patch) | |
| tree | 87b26f0d305b68ba13b26290f2e3817d4486b004 /llvm/lib/Target | |
| parent | 7c7abafd81df5a67c2cd30009b39abc692389504 (diff) | |
| download | bcm5719-llvm-caaceef4b381f0ebeda53889004662c802ce2cab.tar.gz bcm5719-llvm-caaceef4b381f0ebeda53889004662c802ce2cab.zip | |
Expandload and Compressstore intrinsics
2 new intrinsics covering AVX-512 compress/expand functionality.
This implementation includes syntax, DAG builder, operation lowering and tests.
Does not include: handling of illegal data types, codegen prepare pass and the cost model.
llvm-svn: 285876
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 74 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 17 |
2 files changed, 69 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 243bd825132..995e5e6f9d1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1232,10 +1232,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); } else { - setOperationAction(ISD::MLOAD, MVT::v8i32, Custom); - setOperationAction(ISD::MLOAD, MVT::v8f32, Custom); - setOperationAction(ISD::MSTORE, MVT::v8i32, Custom); - setOperationAction(ISD::MSTORE, MVT::v8f32, Custom); + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, + MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Custom); + } } setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); @@ -21940,26 +21941,48 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, SDValue Mask = N->getMask(); SDLoc dl(Op); + assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) && + "Expanding masked load is supported on AVX-512 target only!"); + + assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && + "Expanding masked load is supported for 32 and 64-bit types only!"); + + // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of + // VLX. These types for exp-loads are handled here. + if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4) + return Op; + assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); - assert(((ScalarVT == MVT::i32 || ScalarVT == MVT::f32) || + assert((ScalarVT.getSizeInBits() >= 32 || (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && "Unsupported masked load op."); // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit - unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); - MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); SDValue Src0 = N->getSrc0(); Src0 = ExtendToType(Src0, WideDataVT, DAG); + + // Mask element has to be i1 + MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); + assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && + "We handle 4x32, 4x64 and 2x64 vectors only in this casse"); + + MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + if (MaskEltTy != MVT::i1) + Mask = DAG.getNode(ISD::TRUNCATE, dl, + MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType()); + N->getExtensionType(), + N->isExpandingLoad()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), @@ -21977,10 +22000,20 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, SDValue Mask = N->getMask(); SDLoc dl(Op); + assert((!N->isCompressingStore() || Subtarget.hasAVX512()) && + "Expanding masked load is supported on AVX-512 target only!"); + + assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && + "Expanding masked load is supported for 32 and 64-bit types only!"); + + // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX. + if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4) + return Op; + assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); - assert(((ScalarVT == MVT::i32 || ScalarVT == MVT::f32) || + assert((ScalarVT.getSizeInBits() >= 32 || (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && "Unsupported masked store op."); @@ -21989,12 +22022,22 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, // VLX the vector should be widened to 512 bit unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); - MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + + // Mask element has to be i1 + MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); + assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && + "We handle 4x32, 4x64 and 2x64 vectors only in this casse"); + + MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + if (MaskEltTy != MVT::i1) + Mask = DAG.getNode(ISD::TRUNCATE, dl, + MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), - N->isTruncatingStore()); + N->isTruncatingStore(), N->isCompressingStore()); } static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, @@ -29881,6 +29924,11 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N); + + // TODO: Expanding load with constant mask may be optimized as well. + if (Mld->isExpandingLoad()) + return SDValue(); + if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI)) return ScalarLoad; @@ -29996,6 +30044,10 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N); + + if (Mst->isCompressingStore()) + return SDValue(); + if (!Mst->isTruncatingStore()) return reduceMaskedStoreToScalarStore(Mst, DAG); diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 594af33c7fc..377d8b61cde 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -965,28 +965,23 @@ def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86mstore node:$src1, node:$src2, node:$src3), [{ - if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) - return Store->getAlignment() >= 16; - return false; + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16; }]>; def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86mstore node:$src1, node:$src2, node:$src3), [{ - if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) - return Store->getAlignment() >= 32; - return false; + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32; }]>; def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86mstore node:$src1, node:$src2, node:$src3), [{ - if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) - return Store->getAlignment() >= 64; - return false; + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64; }]>; def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86mstore node:$src1, node:$src2, node:$src3), [{ - return isa<MaskedStoreSDNode>(N); + (masked_store node:$src1, node:$src2, node:$src3), [{ + return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) && + (!cast<MaskedStoreSDNode>(N)->isCompressingStore()); }]>; def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3), |

