diff options
| author | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2019-12-11 14:46:40 +0000 |
|---|---|---|
| committer | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2019-12-13 10:08:20 +0000 |
| commit | 4194ca8e5abff825a3daaa01ea2a6f69d7a652da (patch) | |
| tree | deb36170b27d05bd891a9a811833a418345096a2 /llvm/lib/Target | |
| parent | 5c7cc6f83d1f3ea2016d94e1c9cc25f814d2671b (diff) | |
| download | bcm5719-llvm-4194ca8e5abff825a3daaa01ea2a6f69d7a652da.tar.gz bcm5719-llvm-4194ca8e5abff825a3daaa01ea2a6f69d7a652da.zip | |
Recommit "[AArch64][SVE] Implement intrinsics for non-temporal loads & stores"
Updated pred_load patterns added to AArch64SVEInstrInfo.td by this patch
to use reg + imm non-temporal loads to fix previous test failures.
Original commit message:
Adds the following intrinsics:
- llvm.aarch64.sve.ldnt1
- llvm.aarch64.sve.stnt1
This patch creates masked loads and stores with the
MONonTemporal flag set when used with the intrinsics above.
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 |
3 files changed, 95 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5e55a670201..f601bf13eb5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8513,6 +8513,26 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(16); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; + case Intrinsic::aarch64_sve_ldnt1: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal; + return true; + } + case Intrinsic::aarch64_sve_stnt1: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(2); + Info.offset = 0; + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; + return true; + } default: break; } @@ -10942,6 +10962,48 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, return NewST1; } +static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT PtrTy = N->getOperand(3).getValueType(); + + EVT LoadVT = VT; + if (VT.isFloatingPoint()) + LoadVT = VT.changeTypeToInteger(); + + auto *MINode = cast<MemIntrinsicSDNode>(N); + SDValue PassThru = DAG.getConstant(0, DL, LoadVT); + SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), + MINode->getOperand(3), DAG.getUNDEF(PtrTy), + MINode->getOperand(2), PassThru, + MINode->getMemoryVT(), MINode->getMemOperand(), + ISD::UNINDEXED, ISD::NON_EXTLOAD, false); + + if (VT.isFloatingPoint()) { + SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) }; + return DAG.getMergeValues(Ops, DL); + } + + return L; +} + +static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + + SDValue Data = N->getOperand(2); + EVT DataVT = Data.getValueType(); + EVT PtrTy = N->getOperand(4).getValueType(); + + if (DataVT.isFloatingPoint()) + Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data); + + auto *MINode = cast<MemIntrinsicSDNode>(N); + return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4), + DAG.getUNDEF(PtrTy), MINode->getOperand(3), + MINode->getMemoryVT(), MINode->getMemOperand(), + ISD::UNINDEXED, false, false); +} + /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The /// load store optimizer pass will merge them to store pair stores. This should /// be better than a movi to create the vector zero followed by a vector store @@ -12218,6 +12280,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::aarch64_neon_st3lane: case Intrinsic::aarch64_neon_st4lane: return performNEONPostLDSTCombine(N, DCI, DAG); + case Intrinsic::aarch64_sve_ldnt1: + return performLDNT1Combine(N, DAG); + case Intrinsic::aarch64_sve_stnt1: + return performSTNT1Combine(N, DAG); case Intrinsic::aarch64_sve_ld1_gather: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1); case Intrinsic::aarch64_sve_ld1_gather_index: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 48872dc09cd..9eef93cb9ce 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -265,7 +265,8 @@ def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && - cast<MaskedLoadSDNode>(N)->isUnindexed(); + cast<MaskedLoadSDNode>(N)->isUnindexed() && + !cast<MaskedLoadSDNode>(N)->isNonTemporal(); }]>; // sign extending masked load fragments. def asext_masked_load : @@ -313,12 +314,21 @@ def zext_masked_load_i32 : return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; +def non_temporal_load : + PatFrag<(ops node:$ptr, node:$pred, node:$def), + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed() && + cast<MaskedLoadSDNode>(N)->isNonTemporal(); +}]>; + // non-truncating masked store fragment. def nontrunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && - cast<MaskedStoreSDNode>(N)->isUnindexed(); + cast<MaskedStoreSDNode>(N)->isUnindexed() && + !cast<MaskedStoreSDNode>(N)->isNonTemporal(); }]>; // truncating masked store fragments. def trunc_masked_store : @@ -343,6 +353,14 @@ def trunc_masked_store_i32 : return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; +def non_temporal_store : + PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && + cast<MaskedStoreSDNode>(N)->isUnindexed() && + cast<MaskedStoreSDNode>(N)->isNonTemporal(); +}]>; + // Node definitions. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index cdd1b035849..8dfea2f451d 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1179,6 +1179,15 @@ let Predicates = [HasSVE] in { // 16-element contiguous stores defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>; + defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRI>; + defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRI>; + defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRI>; + defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRI>; + + defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRI>; + defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>; + defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>; + defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>; } let Predicates = [HasSVE2] in { |

