diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-09-10 01:42:28 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-09-10 01:42:28 +0000 |
| commit | b8886b517d417f14951e8d0dc6f00aef85638c34 (patch) | |
| tree | ab61734f33f7c9fdf409e9b3ba8bbf6881b3e3db /llvm/lib | |
| parent | 80f766a032fca529ddcb78d952ee882536223b3b (diff) | |
| download | bcm5719-llvm-b8886b517d417f14951e8d0dc6f00aef85638c34.tar.gz bcm5719-llvm-b8886b517d417f14951e8d0dc6f00aef85638c34.zip | |
[AArch64] Support selecting STNP.
We could go through the load/store optimizer and match STNP where
we would have matched a nontemporal-annotated STP, but that's not
reliable enough, as an opportunistic optimization.
Insetad, we can guarantee emitting STNP, by matching them at ISel.
Since there are no single-input nontemporal stores, we have to
resort to some high-bits-extracting trickery to generate an STNP
from a plain store.
Also, we need to support another, LDP/STP-specific addressing mode,
base + signed scaled 7-bit immediate offset.
For now, only match the base. Let's make it smart separately.
Part of PR24086.
llvm-svn: 247231
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 39 |
3 files changed, 78 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index d93e59ccf77..77896af196d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -77,6 +77,21 @@ public: bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { return SelectShiftedRegister(N, true, Reg, Shift); } + bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 1, Base, OffImm); + } + bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 2, Base, OffImm); + } + bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 4, Base, OffImm); + } + bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 8, Base, OffImm); + } + bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 16, Base, OffImm); + } bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 1, Base, OffImm); } @@ -164,6 +179,8 @@ public: private: bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, SDValue &Shift); + bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, + SDValue &OffImm); bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm); bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, @@ -606,6 +623,22 @@ static bool isWorthFoldingADDlow(SDValue N) { return true; } +/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit +/// immediate" address. The "Size" argument is the size in bytes of the memory +/// reference, which determines the scale. +bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, + SDValue &Base, + SDValue &OffImm) { + SDLoc dl(N); + // Base only. The address will be materialized into a register before + // the memory is accessed. + // add x0, Xbase, #offset + // stp x1, x2, [x0] + Base = N; + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; +} + /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 0903f320601..d644f264eb9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -248,6 +248,12 @@ def simm7s16 : Operand<i32> { let PrintMethod = "printImmScale<16>"; } +def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>; +def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>; +def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>; +def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>; +def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>; + class AsmImmRange<int Low, int High> : AsmOperandClass { let Name = "Imm" # Low # "_" # High; let DiagnosticType = "InvalidImm" # Low # "_" # High; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d3c244977ec..5f01debf4ce 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5825,6 +5825,45 @@ def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +// Patterns for nontemporal/no-allocate stores. +// We have to resort to tricks to turn a single-input store into a store pair, +// because there is no single-input nontemporal store, only STNP. +let Predicates = [IsLE] in { +let AddedComplexity = 15 in { +class NTStore128Pat<ValueType VT> : + Pat<(nontemporalstore (VT FPR128:$Rt), + (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), + (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), + (CPYi64 FPR128:$Rt, (i64 1)), + GPR64sp:$Rn, simm7s8:$offset)>; + +def : NTStore128Pat<v2i64>; +def : NTStore128Pat<v4i32>; +def : NTStore128Pat<v8i16>; +def : NTStore128Pat<v16i8>; + +class NTStore64Pat<ValueType VT> : + Pat<(nontemporalstore (VT FPR64:$Rt), + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), + (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), + GPR64sp:$Rn, simm7s4:$offset)>; + +// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? +def : NTStore64Pat<v1f64>; +def : NTStore64Pat<v1i64>; +def : NTStore64Pat<v2i32>; +def : NTStore64Pat<v4i16>; +def : NTStore64Pat<v8i8>; + +def : Pat<(nontemporalstore GPR64:$Rt, + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), + (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32), + GPR64sp:$Rn, simm7s4:$offset)>; +} // AddedComplexity=10 +} // Predicates = [IsLE] + // Tail call return handling. These are all compiler pseudo-instructions, // so no encoding information or anything like that. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { |

