[AVX-512] Add EVEX form of VMOVZPQILo2PQIZrm to load folding tables to match SSE and AVX.

llvm-svn: 287523
author: Craig Topper <craig.topper@gmail.com> 2016-11-21 07:51:31 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-11-21 07:51:31 +0000
commit: 9f2d632ee7b7d4b1695278c32cca8391403f51d4 (patch)
tree: 587b3f86989a545a24ea3cc1b01e2d28b0e154e5
parent: 2bb523f554904fda77c21e51c5f5c7bdec53651a (diff)
download: bcm5719-llvm-9f2d632ee7b7d4b1695278c32cca8391403f51d4.tar.gz
bcm5719-llvm-9f2d632ee7b7d4b1695278c32cca8391403f51d4.zip
2 files changed, 13 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cf12b3665a5..3c253a6ee0f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -855,6 +855,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
 
     // AVX-512 foldable instructions
     { X86::VMOV64toPQIZrr,   X86::VMOVQI2PQIZrm,      0 },
+    { X86::VMOVZPQILo2PQIZrr,X86::VMOVZPQILo2PQIZrm,  TB_ALIGN_16 },
     { X86::VMOVDI2SSZrr,     X86::VMOVDI2SSZrm,       0 },
     { X86::VMOVAPDZrr,       X86::VMOVAPDZrm,         TB_ALIGN_64 },
     { X86::VMOVAPSZrr,       X86::VMOVAPSZrm,         TB_ALIGN_64 },
diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll
index ed66966bdef..58d88c597fe 100644
--- a/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-int-avx512.ll
@@ -377,7 +377,7 @@ define <4 x i64> @stack_fold_extracti64x4(<8 x i64> %a0, <8 x i64> %a1) {
 define <16 x i32> @stack_fold_inserti32x8(<8 x i32> %a0, <8 x i32> %a1) {
   ;CHECK-LABEL: stack_fold_inserti32x8
   ;CHECK:       vinserti32x8 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
   %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ; add forces execution domain
   %3 = add <16 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -387,9 +387,19 @@ define <16 x i32> @stack_fold_inserti32x8(<8 x i32> %a0, <8 x i32> %a1) {
 define <8 x i64> @stack_fold_inserti64x4(<4 x i64> %a0, <4 x i64> %a1) {
   ;CHECK-LABEL: stack_fold_inserti64x4
   ;CHECK:       vinserti64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
   %2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ; add forces execution domain
   %3 = add <8 x i64> %2, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   ret <8 x i64> %3
 }
+
+define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
+  ;CHECK-LABEL: stack_fold_movq_load
+  ;CHECK:       movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  ; add forces execution domain
+  %3 = add <2 x i64> %2, <i64 1, i64 1>
+  ret <2 x i64> %3
+}
author	Craig Topper <craig.topper@gmail.com>	2016-11-21 07:51:31 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-11-21 07:51:31 +0000
commit	9f2d632ee7b7d4b1695278c32cca8391403f51d4 (patch)
tree	587b3f86989a545a24ea3cc1b01e2d28b0e154e5
parent	2bb523f554904fda77c21e51c5f5c7bdec53651a (diff)
download	bcm5719-llvm-9f2d632ee7b7d4b1695278c32cca8391403f51d4.tar.gz bcm5719-llvm-9f2d632ee7b7d4b1695278c32cca8391403f51d4.zip