diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp | 3 |
4 files changed, 88 insertions, 4 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7becc99fb5c..b78837aae83 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1100,6 +1100,32 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( return true; } +// Same as above but handling LLTs instead. +bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( + LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (Subtarget->requiresStrictAlign()) + return false; + + if (Fast) { + // Some CPUs are fine with unaligned stores except for 128-bit ones. + *Fast = !Subtarget->isMisaligned128StoreSlow() || + Ty.getSizeInBytes() != 16 || + // See comments in performSTORECombine() for more details about + // these conditions. + + // Code that uses clang vector extensions can mark that it + // wants unaligned accesses to be treated as fast by + // underspecifying alignment to be 1 or 2. + Align <= 2 || + + // Disregard v2i64. Memcpy lowering produces those and splitting + // them regresses performance on micro-benchmarks and olden/bh. + Ty == LLT::vector(2, 64); + } + return true; +} + FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { @@ -8739,6 +8765,39 @@ EVT AArch64TargetLowering::getOptimalMemOpType( return MVT::Other; } +LLT AArch64TargetLowering::getOptimalMemOpLLT( + uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const { + bool CanImplicitFloat = + !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat); + bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; + bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; + // Only use AdvSIMD to implement memset of 32-byte and above. It would have + // taken one instruction to materialize the v2i64 zero and one store (with + // restrictive addressing mode). Just do i64 stores. + bool IsSmallMemset = IsMemset && Size < 32; + auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { + if (memOpAlign(SrcAlign, DstAlign, AlignCheck)) + return true; + bool Fast; + return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, + &Fast) && + Fast; + }; + + if (CanUseNEON && IsMemset && !IsSmallMemset && + AlignmentIsAcceptable(MVT::v2i64, 16)) + return LLT::vector(2, 64); + if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) + return LLT::scalar(128); + if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + return LLT::scalar(64); + if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + return LLT::scalar(32); + return LLT(); +} + // 12-bit optionally shifted immediates are legal for adds. bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { if (Immed == std::numeric_limits<int64_t>::min()) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4421c31f65c..34e1fdf441e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -272,6 +272,10 @@ public: EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *Fast = nullptr) const override; + /// LLT variant. + bool allowsMisalignedMemoryAccesses( + LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast = nullptr) const override; /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -358,6 +362,10 @@ public: bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, const AttributeList &FuncAttributes) const override; + LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const override; + /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, diff --git a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp index 5f7245bfbd7..5ec209ada17 100644 --- a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -28,9 +28,9 @@ using namespace MIPatternMatch; namespace { class AArch64PreLegalizerCombinerInfo : public CombinerInfo { public: - AArch64PreLegalizerCombinerInfo() + AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize) : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr) {} + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize) {} virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; @@ -51,6 +51,18 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: return Helper.tryCombineExtendingLoads(MI); + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // Try to inline memcpy type calls if optimizations are enabled. + return (EnableOpt && !EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI) + : false; + } + default: + break; + } } return false; @@ -89,7 +101,11 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel)) return false; auto *TPC = &getAnalysis<TargetPassConfig>(); - AArch64PreLegalizerCombinerInfo PCInfo; + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); + AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), + F.hasMinSize()); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); } diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp index 85076590d40..ace0735652b 100644 --- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -27,7 +27,8 @@ class MipsPreLegalizerCombinerInfo : public CombinerInfo { public: MipsPreLegalizerCombinerInfo() : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr) {} + /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, + /*EnableOptSize*/ false, /*EnableMinSize*/ false) {} virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; |