summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2015-08-21 20:39:17 +0000
committerSanjay Patel <spatel@rotateright.com>2015-08-21 20:39:17 +0000
commitdddad102418960e8dc187233fff849c1ce9badf4 (patch)
treeb1d8f8f07a1a40db193c994aa66f60a5911ed453
parent07e7acb635e36bebbcee0e730fdb03b3fffbc99f (diff)
downloadbcm5719-llvm-dddad102418960e8dc187233fff849c1ce9badf4.tar.gz
bcm5719-llvm-dddad102418960e8dc187233fff849c1ce9badf4.zip
remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later
See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data. llvm-svn: 245733
-rw-r--r--llvm/lib/Target/X86/X86.td18
-rw-r--r--llvm/test/CodeGen/X86/slow-unaligned-mem.ll14
2 files changed, 14 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index d081614a821..d00a1113e2e 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -433,21 +433,19 @@ def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"amdfam10", [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"amdfam10", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"barcelona", [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"barcelona", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here.
-
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD, FeatureSlowUAMem]>;
+ FeatureSlowSHLD]>;
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model,
@@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Model,
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowSHLD,
- FeatureSlowUAMem]>;
+ FeaturePOPCNT, FeatureSlowSHLD]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureSlowUAMem]>;
+ FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
// Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
@@ -477,7 +473,7 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureFSGSBase, FeatureSlowUAMem]>;
+ FeatureFSGSBase]>;
// Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
@@ -485,7 +481,7 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2,
FeatureTBM, FeatureFMA, FeatureSSE4A,
- FeatureFSGSBase, FeatureSlowUAMem]>;
+ FeatureFSGSBase]>;
def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index e865ca16ca1..5c8166b63a3 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -39,14 +39,14 @@
; AMD chips with fast unaligned memory accesses
; FIXME: These are wrong except for btver2.
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
; Other chips with slow unaligned memory accesses
OpenPOWER on IntegriCloud