summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx512-schedule.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-schedule.ll')
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll122
1 files changed, 72 insertions, 50 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 7a2aa56f7cc..096cbc7d459 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -2787,15 +2787,17 @@ define <16 x float> @usto16f32(<16 x i16> %a) {
define <16 x float> @ubto16f32(<16 x i32> %a) {
; GENERIC-LABEL: ubto16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto16f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2806,23 +2808,25 @@ define <16 x float> @ubto16f32(<16 x i32> %a) {
define <16 x double> @ubto16f64(<16 x i32> %a) {
; GENERIC-LABEL: ubto16f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
-; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
+; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: kshiftrw $8, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto16f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
-; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [3:1.00]
+; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: kshiftrw $8, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [3:1.00]
+; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2833,15 +2837,17 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
define <8 x float> @ubto8f32(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto8f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <8 x i32> %a, zeroinitializer
@@ -2852,15 +2858,17 @@ define <8 x float> @ubto8f32(<8 x i32> %a) {
define <8 x double> @ubto8f64(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto8f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <8 x i32> %a, zeroinitializer
@@ -2871,15 +2879,17 @@ define <8 x double> @ubto8f64(<8 x i32> %a) {
define <4 x float> @ubto4f32(<4 x i32> %a) {
; GENERIC-LABEL: ubto4f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto4f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <4 x i32> %a, zeroinitializer
@@ -2890,15 +2900,17 @@ define <4 x float> @ubto4f32(<4 x i32> %a) {
define <4 x double> @ubto4f64(<4 x i32> %a) {
; GENERIC-LABEL: ubto4f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto4f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <4 x i32> %a, zeroinitializer
@@ -2911,8 +2923,9 @@ define <2 x float> @ubto2f32(<2 x i32> %a) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2920,8 +2933,9 @@ define <2 x float> @ubto2f32(<2 x i32> %a) {
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
-; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
@@ -2934,8 +2948,9 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2943,8 +2958,9 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
-; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
@@ -4244,14 +4260,16 @@ define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
; GENERIC-LABEL: zext_16i1_to_16xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16i1_to_16xi32:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
@@ -4261,14 +4279,16 @@ define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
; GENERIC-LABEL: zext_8i1_to_8xi64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8i1_to_8xi64:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
@@ -4653,14 +4673,16 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
; GENERIC-LABEL: zext_32xi1_to_32xi16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
+; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_32xi1_to_32xi16:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp eq <32 x i16> %x, %y
%1 = zext <32 x i1> %mask to <32 x i16>
OpenPOWER on IntegriCloud