diff options
author | Sanjay Patel <spatel@rotateright.com> | 2016-03-31 17:30:06 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2016-03-31 17:30:06 +0000 |
commit | 92d5ea5e07bf122b10500715cd74eed963cf56cc (patch) | |
tree | 6b54f937b61d242102ab04439c74f34fcfc0be52 /llvm/lib/Target/X86 | |
parent | ab962acd5940bb38810f4b7993166058ea8865f4 (diff) | |
download | bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.tar.gz bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.zip |
[x86] use SSE/AVX ops for non-zero memsets (PR27100)
Move the memset check down to the CPU-with-slow-SSE-unaligned-memops case: this allows fast
targets to take advantage of SSE/AVX instructions and prevents slow targets from stepping
into a codegen sinkhole while trying to splat a byte into an XMM reg.
Follow-on bugs exposed by the current codegen are:
https://llvm.org/bugs/show_bug.cgi?id=27141
https://llvm.org/bugs/show_bug.cgi?id=27143
Differential Revision: http://reviews.llvm.org/D18566
llvm-svn: 265029
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1032137e8f6..329cdc0a53c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2025,8 +2025,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); - if ((!IsMemset || ZeroMemset) && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && @@ -2042,11 +2041,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::v4i32; if (Subtarget.hasSSE1()) return MVT::v4f32; - } else if (!MemcpyStrSrc && Size >= 8 && - !Subtarget.is64Bit() && - Subtarget.hasSSE2()) { + } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && + !Subtarget.is64Bit() && Subtarget.hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. + // Also, do not use f64 to lower memset unless this is a memset of zeros. + // The gymnastics of splatting a byte value into an XMM register and then + // only using 8-byte stores (because this is a CPU with slow unaligned + // 16-byte accesses) makes that a loser. return MVT::f64; } } |