[X86] AMD znver2 enablement

This patch enables the following 1) AMD family 17h "znver2" tune flag (-march, -mcpu). 2) ISAs that are enabled for "znver2" architecture. 3) For the time being, it uses the znver1 scheduler model. 4) Tests are updated. 5) Scheduler descriptions are yet to be put in place. Reviewers: craig.topper Differential Revision: https://reviews.llvm.org/D58343 llvm-svn: 354897
author: Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> 2019-02-26 16:55:10 +0000
committer: Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> 2019-02-26 16:55:10 +0000
commit: e172d7008d0c12eaa9ee2aee16ee4e13c176553c (patch)
tree: 8beb77f4f6a28e4e3c230c29202395f175c7a3ac /llvm
parent: c110b5b69f19700939a56d08218dfb0abb577af9 (diff)
download: bcm5719-llvm-e172d7008d0c12eaa9ee2aee16ee4e13c176553c.tar.gz
bcm5719-llvm-e172d7008d0c12eaa9ee2aee16ee4e13c176553c.zip
7 files changed, 31 insertions, 5 deletions
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index fb82228c4df..2b5936bb260 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -98,6 +98,7 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a
 X86_CPU_SUBTYPE_COMPAT("cannonlake",     INTEL_COREI7_CANNONLAKE,     "cannonlake")
 X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
 X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
+X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
 // Entries below this are not in libgcc/compiler-rt.
 X86_CPU_SUBTYPE       ("core2",          INTEL_CORE2_65)
 X86_CPU_SUBTYPE       ("penryn",         INTEL_CORE2_45)
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 35bc9722241..52e7080e744 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -916,7 +916,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     break; // "btver2"
   case 23:
     *Type = X86::AMDFAM17H;
-    *Subtype = X86::AMDFAM17H_ZNVER1;
+    if (Model >= 0x30 && Model <= 0x3f) {
+      *Subtype = X86::AMDFAM17H_ZNVER2;
+      break; // "znver2"; 30h-3fh: Zen2
+    }
+    if (Model <= 0x0f) {
+      *Subtype = X86::AMDFAM17H_ZNVER1;
+      break; // "znver1"; 00h-0Fh: Zen1
+    }
     break;
   default:
     break; // "generic"
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 502278bd8a5..094790023b1 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1143,8 +1143,8 @@ def : Proc<"bdver4", [
   FeatureMacroFusion
 ]>;
 
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
+// AMD Zen Processors common ISAs
+def ZNFeatures : ProcessorFeatures<[], [
   FeatureADX,
   FeatureAES,
   FeatureAVX2,
@@ -1183,6 +1183,19 @@ def: ProcessorModel<"znver1", Znver1Model, [
   FeatureXSAVEOPT,
   FeatureXSAVES]>;
 
+class Znver1Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+]>;
+def : Znver1Proc<"znver1">;
+
+class Znver2Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+  FeatureCLWB,
+  FeatureRDPID,
+  FeatureWBNOINVD
+]>;
+def : Znver2Proc<"znver2">;
+
 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
diff --git a/llvm/test/CodeGen/X86/cpus-amd.ll b/llvm/test/CodeGen/X86/cpus-amd.ll
index 5e90048d389..c5716d68e63 100644
--- a/llvm/test/CodeGen/X86/cpus-amd.ll
+++ b/llvm/test/CodeGen/X86/cpus-amd.ll
@@ -26,6 +26,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
index 90abd2d06ba..8d43a1b7323 100644
--- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 
 ; Test one 32-bit input, output is 32-bit, no transformations expected.
 define i32 @test_zext_cmp0(i32 %a) {
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index 54c248f3b04..f2c7c2fa4a5 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -47,6 +47,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
index 10cb1843f3e..513e7774f6a 100644
--- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -13,8 +13,9 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 
-; Verify that for the X86_64 processors that are known to have poor latency 
+; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
 ; instructions.
 
@@ -25,7 +26,7 @@
 
 define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
 entry:
-; CHECK-NOT: shld 
+; CHECK-NOT: shld
   %sh_prom = zext i32 %c to i64
   %shl = shl i64 %a, %sh_prom
   %sub = sub nsw i32 64, %c
author	Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>	2019-02-26 16:55:10 +0000
committer	Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>	2019-02-26 16:55:10 +0000
commit	e172d7008d0c12eaa9ee2aee16ee4e13c176553c (patch)
tree	8beb77f4f6a28e4e3c230c29202395f175c7a3ac /llvm
parent	c110b5b69f19700939a56d08218dfb0abb577af9 (diff)
download	bcm5719-llvm-e172d7008d0c12eaa9ee2aee16ee4e13c176553c.tar.gz bcm5719-llvm-e172d7008d0c12eaa9ee2aee16ee4e13c176553c.zip