summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorKerry McLaughlin <kerry.mclaughlin@arm.com>2019-12-11 14:46:40 +0000
committerKerry McLaughlin <kerry.mclaughlin@arm.com>2019-12-13 10:08:20 +0000
commit4194ca8e5abff825a3daaa01ea2a6f69d7a652da (patch)
treedeb36170b27d05bd891a9a811833a418345096a2 /llvm/lib/Target
parent5c7cc6f83d1f3ea2016d94e1c9cc25f814d2671b (diff)
downloadbcm5719-llvm-4194ca8e5abff825a3daaa01ea2a6f69d7a652da.tar.gz
bcm5719-llvm-4194ca8e5abff825a3daaa01ea2a6f69d7a652da.zip
Recommit "[AArch64][SVE] Implement intrinsics for non-temporal loads & stores"
Updated pred_load patterns added to AArch64SVEInstrInfo.td by this patch to use reg + imm non-temporal loads to fix previous test failures. Original commit message: Adds the following intrinsics: - llvm.aarch64.sve.ldnt1 - llvm.aarch64.sve.stnt1 This patch creates masked loads and stores with the MONonTemporal flag set when used with the intrinsics above.
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp66
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td22
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td9
3 files changed, 95 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5e55a670201..f601bf13eb5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8513,6 +8513,26 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(16);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
+ case Intrinsic::aarch64_sve_ldnt1: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
+ return true;
+ }
+ case Intrinsic::aarch64_sve_stnt1: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
+ return true;
+ }
default:
break;
}
@@ -10942,6 +10962,48 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
return NewST1;
}
+static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT PtrTy = N->getOperand(3).getValueType();
+
+ EVT LoadVT = VT;
+ if (VT.isFloatingPoint())
+ LoadVT = VT.changeTypeToInteger();
+
+ auto *MINode = cast<MemIntrinsicSDNode>(N);
+ SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
+ SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
+ MINode->getOperand(3), DAG.getUNDEF(PtrTy),
+ MINode->getOperand(2), PassThru,
+ MINode->getMemoryVT(), MINode->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+
+ if (VT.isFloatingPoint()) {
+ SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
+ return DAG.getMergeValues(Ops, DL);
+ }
+
+ return L;
+}
+
+static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ SDValue Data = N->getOperand(2);
+ EVT DataVT = Data.getValueType();
+ EVT PtrTy = N->getOperand(4).getValueType();
+
+ if (DataVT.isFloatingPoint())
+ Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
+
+ auto *MINode = cast<MemIntrinsicSDNode>(N);
+ return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
+ DAG.getUNDEF(PtrTy), MINode->getOperand(3),
+ MINode->getMemoryVT(), MINode->getMemOperand(),
+ ISD::UNINDEXED, false, false);
+}
+
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
/// load store optimizer pass will merge them to store pair stores. This should
/// be better than a movi to create the vector zero followed by a vector store
@@ -12218,6 +12280,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);
+ case Intrinsic::aarch64_sve_ldnt1:
+ return performLDNT1Combine(N, DAG);
+ case Intrinsic::aarch64_sve_stnt1:
+ return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ld1_gather:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
case Intrinsic::aarch64_sve_ld1_gather_index:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 48872dc09cd..9eef93cb9ce 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -265,7 +265,8 @@ def nonext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
- cast<MaskedLoadSDNode>(N)->isUnindexed();
+ cast<MaskedLoadSDNode>(N)->isUnindexed() &&
+ !cast<MaskedLoadSDNode>(N)->isNonTemporal();
}]>;
// sign extending masked load fragments.
def asext_masked_load :
@@ -313,12 +314,21 @@ def zext_masked_load_i32 :
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
+def non_temporal_load :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed() &&
+ cast<MaskedLoadSDNode>(N)->isNonTemporal();
+}]>;
+
// non-truncating masked store fragment.
def nontrunc_masked_store :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
- cast<MaskedStoreSDNode>(N)->isUnindexed();
+ cast<MaskedStoreSDNode>(N)->isUnindexed() &&
+ !cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
// truncating masked store fragments.
def trunc_masked_store :
@@ -343,6 +353,14 @@ def trunc_masked_store_i32 :
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
+def non_temporal_store :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed() &&
+ cast<MaskedStoreSDNode>(N)->isNonTemporal();
+}]>;
+
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index cdd1b035849..8dfea2f451d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1179,6 +1179,15 @@ let Predicates = [HasSVE] in {
// 16-element contiguous stores
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
+ defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRI>;
+ defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRI>;
+ defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRI>;
+ defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRI>;
+
+ defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRI>;
+ defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>;
+ defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>;
+ defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>;
}
let Predicates = [HasSVE2] in {
OpenPOWER on IntegriCloud