[x86] use SSE/AVX ops for non-zero memsets (PR27100)

Move the memset check down to the CPU-with-slow-SSE-unaligned-memops case: this allows fast targets to take advantage of SSE/AVX instructions and prevents slow targets from stepping into a codegen sinkhole while trying to splat a byte into an XMM reg. Follow-on bugs exposed by the current codegen are: https://llvm.org/bugs/show_bug.cgi?id=27141 https://llvm.org/bugs/show_bug.cgi?id=27143 Differential Revision: http://reviews.llvm.org/D18566 llvm-svn: 265029
author: Sanjay Patel <spatel@rotateright.com> 2016-03-31 17:30:06 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2016-03-31 17:30:06 +0000
commit: 92d5ea5e07bf122b10500715cd74eed963cf56cc (patch)
tree: 6b54f937b61d242102ab04439c74f34fcfc0be52 /llvm/lib/Target/X86
parent: ab962acd5940bb38810f4b7993166058ea8865f4 (diff)
download: bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.tar.gz
bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.zip
1 files changed, 7 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1032137e8f6..329cdc0a53c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2025,8 +2025,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        bool MemcpyStrSrc,
                                        MachineFunction &MF) const {
   const Function *F = MF.getFunction();
-  if ((!IsMemset || ZeroMemset) &&
-      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+  if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
         (!Subtarget.isUnalignedMem16Slow() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
@@ -2042,11 +2041,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         return MVT::v4i32;
       if (Subtarget.hasSSE1())
         return MVT::v4f32;
-    } else if (!MemcpyStrSrc && Size >= 8 &&
-               !Subtarget.is64Bit() &&
-               Subtarget.hasSSE2()) {
+    } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
+               !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
       // Do not use f64 to lower memcpy if source is string constant. It's
       // better to use i32 to avoid the loads.
+      // Also, do not use f64 to lower memset unless this is a memset of zeros.
+      // The gymnastics of splatting a byte value into an XMM register and then
+      // only using 8-byte stores (because this is a CPU with slow unaligned
+      // 16-byte accesses) makes that a loser.
       return MVT::f64;
     }
   }
author	Sanjay Patel <spatel@rotateright.com>	2016-03-31 17:30:06 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2016-03-31 17:30:06 +0000
commit	92d5ea5e07bf122b10500715cd74eed963cf56cc (patch)
tree	6b54f937b61d242102ab04439c74f34fcfc0be52 /llvm/lib/Target/X86
parent	ab962acd5940bb38810f4b7993166058ea8865f4 (diff)
download	bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.tar.gz bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.zip