diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-10-14 05:55:43 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-10-14 05:55:43 +0000 |
| commit | 61010a85b884d525fe4fb7be02b4ebebb9c86ff7 (patch) | |
| tree | 6e6b71ab6ea6d956d4ec0b090194ec1c9765ea05 | |
| parent | ee277e190c514228131a83b7bb80c8628295189c (diff) | |
| download | bcm5719-llvm-61010a85b884d525fe4fb7be02b4ebebb9c86ff7.tar.gz bcm5719-llvm-61010a85b884d525fe4fb7be02b4ebebb9c86ff7.zip | |
[X86] Add AVX512 versions of VCVTPD2PS to load folding tables.
llvm-svn: 315801
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 16 |
3 files changed, 27 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index b9f78d3ad43..ae4d123e4cf 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -930,6 +930,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, { X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 }, + { X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrm, 0 }, { X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0 }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, @@ -991,6 +992,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, { X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 }, + { X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rm, 0 }, { X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0 }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, @@ -1044,6 +1046,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions (128-bit versions) { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, { X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE }, + { X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rm, 0 }, { X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll index e22e1a28e95..7bd46029f0e 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll @@ -200,6 +200,14 @@ define <8 x double> @stack_fold_cvtudq2pd(<8 x i32> %a0) { ret <8 x double> %2 } +define <8 x float> @stack_fold_cvtpd2ps(<8 x double> %a0) { + ;CHECK-LABEL: stack_fold_cvtpd2ps + ;CHECK: vcvtpd2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fptrunc <8 x double> %a0 to <8 x float> + ret <8 x float> %2 +} + define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { ;CHECK-LABEL: stack_fold_insertps ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index 331af4b6799..717e942fff1 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -250,6 +250,22 @@ define <4 x double> @stack_fold_cvtudq2pd_ymm(<4 x i32> %a0) { ret <4 x double> %2 } +define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { + ;CHECK-LABEL: stack_fold_cvtpd2ps + ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fptrunc <2 x double> %a0 to <2 x float> + ret <2 x float> %2 +} + +define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) { + ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm + ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fptrunc <4 x double> %a0 to <4 x float> + ret <4 x float> %2 +} + define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxpd ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload |

