summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
authorVictor Campos <Victor.Campos@arm.com>2019-10-28 13:44:48 +0000
committerVictor Campos <Victor.Campos@arm.com>2019-12-18 10:03:12 +0000
commit364b8f5fbe0ac496931dcbd6f0493781f0677e82 (patch)
treeda7fa9bd8a82a6d3fe8c12b2bafee629901757e8 /llvm/lib/Target/AArch64
parent97ca7c2cc9083ebde681b0e11f7a8ccae1966d64 (diff)
downloadbcm5719-llvm-364b8f5fbe0ac496931dcbd6f0493781f0677e82.tar.gz
bcm5719-llvm-364b8f5fbe0ac496931dcbd6f0493781f0677e82.zip
[AArch64] Improve codegen of volatile load/store of i128
Summary: Instead of generating two i64 instructions for each load or store of a volatile i128 value (two LDRs or STRs), now emit a single LDP or STP. Reviewers: labrinea, t.p.northover, efriedma Reviewed By: efriedma Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69559
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp67
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td12
3 files changed, 69 insertions, 14 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6ea1e603f9e..87a320dfd3a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -516,6 +516,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ // 128-bit loads and stores can be done without expanding
+ setOperationAction(ISD::LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::STORE, MVT::i128, Custom);
+
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget->hasPerfMon())
@@ -1364,6 +1368,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
+ case AArch64ISD::LDP: return "AArch64ISD::LDP";
+ case AArch64ISD::STP: return "AArch64ISD::STP";
}
return nullptr;
}
@@ -2988,7 +2994,7 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
-// from vector v4i16 to v4i8.
+// from vector v4i16 to v4i8 or volatile stores of i128.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
@@ -3000,18 +3006,32 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
EVT VT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
- assert (VT.isVector() && "Can only custom lower vector store types");
-
- unsigned AS = StoreNode->getAddressSpace();
- unsigned Align = StoreNode->getAlignment();
- if (Align < MemVT.getStoreSize() &&
- !allowsMisalignedMemoryAccesses(
- MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
- return scalarizeVectorStore(StoreNode, DAG);
- }
-
- if (StoreNode->isTruncatingStore()) {
- return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
+ if (VT.isVector()) {
+ unsigned AS = StoreNode->getAddressSpace();
+ unsigned Align = StoreNode->getAlignment();
+ if (Align < MemVT.getStoreSize() &&
+ !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
+ StoreNode->getMemOperand()->getFlags(),
+ nullptr)) {
+ return scalarizeVectorStore(StoreNode, DAG);
+ }
+
+ if (StoreNode->isTruncatingStore()) {
+ return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
+ }
+ } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
+ assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
+ SDValue Lo =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
+ DAG.getConstant(0, Dl, MVT::i64));
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
+ DAG.getConstant(1, Dl, MVT::i64));
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
+ {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ StoreNode->getMemoryVT(), StoreNode->getMemOperand());
+ return Result;
}
return SDValue();
@@ -12689,6 +12709,27 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
+ case ISD::LOAD: {
+ assert(SDValue(N, 0).getValueType() == MVT::i128 &&
+ "unexpected load's value type");
+ LoadSDNode *LoadNode = cast<LoadSDNode>(N);
+ if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
+ // Non-volatile loads are optimized later in AArch64's load/store
+ // optimizer.
+ return;
+ }
+
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::LDP, SDLoc(N),
+ DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
+ {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
+ LoadNode->getMemOperand());
+
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+ Result.getValue(0), Result.getValue(1));
+ Results.append({Pair, Result.getValue(2) /* Chain */});
+ return;
+ }
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 449c0d376b7..fee8d21c2a6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -260,8 +260,10 @@ enum NodeType : unsigned {
STG,
STZG,
ST2G,
- STZ2G
+ STZ2G,
+ LDP,
+ STP
};
} // end namespace AArch64ISD
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9eef93cb9ce..8dc61e517cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -243,6 +243,9 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
// ldr x1, [x0, #:tlsdesc_lo12:var]
@@ -535,6 +538,9 @@ def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
+def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -1987,6 +1993,9 @@ defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
+def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
+
//---
// (register offset)
//---
@@ -2680,6 +2689,9 @@ defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
+def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
+
//---
// (Register offset)
OpenPOWER on IntegriCloud