From 9f6c4d50b4b934e97e3e4bd1160df573b203ad0d Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 9 Mar 2016 22:23:33 +0000
Subject: [x86] fix cost model inaccuracy for vector memory ops

The irony of this patch is that one CPU that is affected is AMD Jaguar, and Jaguar
has a completely double-pumped AVX implementation. But getting the cost model to
reflect that is a much bigger problem. The small goal here is simply to improve on
the lie that !AVX2 == SandyBridge.

Differential Revision: http://reviews.llvm.org/D18000

llvm-svn: 263069
---
 llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'llvm/lib')

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index efa7feba4c0..ba977eb7058 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -983,10 +983,10 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
   // Each load/store unit costs 1.
   int Cost = LT.first * 1;
 
-  // On Sandybridge 256bit load/stores are double pumped
-  // (but not on Haswell).
-  if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
-    Cost*=2;
+  // This isn't exactly right. We're using slow unaligned 32-byte accesses as a
+  // proxy for a double-pumped AVX memory interface such as on Sandybridge.
+  if (LT.second.getStoreSize() == 32 && ST->isUnalignedMem32Slow())
+    Cost *= 2;
 
   return Cost;
 }
-- 
cgit v1.2.3