summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2016-03-31 17:30:06 +0000
committerSanjay Patel <spatel@rotateright.com>2016-03-31 17:30:06 +0000
commit92d5ea5e07bf122b10500715cd74eed963cf56cc (patch)
tree6b54f937b61d242102ab04439c74f34fcfc0be52 /llvm/lib/Target/X86
parentab962acd5940bb38810f4b7993166058ea8865f4 (diff)
downloadbcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.tar.gz
bcm5719-llvm-92d5ea5e07bf122b10500715cd74eed963cf56cc.zip
[x86] use SSE/AVX ops for non-zero memsets (PR27100)
Move the memset check down to the CPU-with-slow-SSE-unaligned-memops case: this allows fast targets to take advantage of SSE/AVX instructions and prevents slow targets from stepping into a codegen sinkhole while trying to splat a byte into an XMM reg. Follow-on bugs exposed by the current codegen are: https://llvm.org/bugs/show_bug.cgi?id=27141 https://llvm.org/bugs/show_bug.cgi?id=27143 Differential Revision: http://reviews.llvm.org/D18566 llvm-svn: 265029
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp12
1 files changed, 7 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1032137e8f6..329cdc0a53c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2025,8 +2025,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
- if ((!IsMemset || ZeroMemset) &&
- !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(!Subtarget.isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
@@ -2042,11 +2041,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::v4i32;
if (Subtarget.hasSSE1())
return MVT::v4f32;
- } else if (!MemcpyStrSrc && Size >= 8 &&
- !Subtarget.is64Bit() &&
- Subtarget.hasSSE2()) {
+ } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
+ !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
+ // Also, do not use f64 to lower memset unless this is a memset of zeros.
+ // The gymnastics of splatting a byte value into an XMM register and then
+ // only using 8-byte stores (because this is a CPU with slow unaligned
+ // 16-byte accesses) makes that a loser.
return MVT::f64;
}
}
OpenPOWER on IntegriCloud