diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2015-08-21 20:39:17 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2015-08-21 20:39:17 +0000 |
| commit | dddad102418960e8dc187233fff849c1ce9badf4 (patch) | |
| tree | b1d8f8f07a1a40db193c994aa66f60a5911ed453 | |
| parent | 07e7acb635e36bebbcee0e730fdb03b3fffbc99f (diff) | |
| download | bcm5719-llvm-dddad102418960e8dc187233fff849c1ce9badf4.tar.gz bcm5719-llvm-dddad102418960e8dc187233fff849c1ce9badf4.zip | |
remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later
See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software
Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data.
llvm-svn: 245733
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/slow-unaligned-mem.ll | 14 |
2 files changed, 14 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index d081614a821..d00a1113e2e 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -433,21 +433,19 @@ def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA, def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"amdfam10", [FeatureSlowUAMem, FeatureSSE4A, +def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"barcelona", [FeatureSlowUAMem, FeatureSSE4A, +def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD]>; -// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here. - // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowSHLD, FeatureSlowUAMem]>; + FeatureSlowSHLD]>; // Jaguar def : ProcessorModel<"btver2", BtVer2Model, @@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Model, def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureAVX, FeatureSSE4A, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowSHLD, - FeatureSlowUAMem]>; + FeaturePOPCNT, FeatureSlowSHLD]>; // Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureAVX, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, - FeatureTBM, FeatureFMA, FeatureSlowSHLD, - FeatureSlowUAMem]>; + FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; // Steamroller def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, @@ -477,7 +473,7 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAVX, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureTBM, FeatureFMA, FeatureSlowSHLD, - FeatureFSGSBase, FeatureSlowUAMem]>; + FeatureFSGSBase]>; // Excavator def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, @@ -485,7 +481,7 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureBMI2, FeatureTBM, FeatureFMA, FeatureSSE4A, - FeatureFSGSBase, FeatureSlowUAMem]>; + FeatureFSGSBase]>; def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>; diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll index e865ca16ca1..5c8166b63a3 100644 --- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -39,14 +39,14 @@ ; AMD chips with fast unaligned memory accesses ; FIXME: These are wrong except for btver2. -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=SLOW +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=SLOW +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses |

