summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-12-24 06:51:36 +0000
committerCraig Topper <craig.topper@intel.com>2017-12-24 06:51:36 +0000
commit2d1d9a11c1dbea2010c5d49d4e7b9224e26c5a2a (patch)
treeb2ee803ce11d582a0ba5664c8e6dd4382434a860 /llvm/test
parent64edcdc3fbb6ce07e40704bc17d2dae0c3529258 (diff)
downloadbcm5719-llvm-2d1d9a11c1dbea2010c5d49d4e7b9224e26c5a2a.tar.gz
bcm5719-llvm-2d1d9a11c1dbea2010c5d49d4e7b9224e26c5a2a.zip
[X86] Fix (v2f64 (s/uint_to_fp (v2i1))) to avoid scalarization without AVX512DQ.
Previously we extended v2i1 to v2f64 and then tried to use cvtuqq2pd/cvtqq2pd, but that only works with avx512dq. So we ended up scalarizing it. Now we widen to v4i1 first and extend to v4i32. llvm-svn: 321420
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt.ll48
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll16
2 files changed, 26 insertions, 38 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index e88ec9d7b15..18e9f306bc1 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -1691,8 +1691,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
; VLDQ: # %bb.0:
; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
-; VLDQ-NEXT: vpmovm2q %k0, %xmm0
-; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
+; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sbto2f64:
@@ -1700,12 +1700,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
-; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
-; VLNODQ-NEXT: vmovq %xmm0, %rax
-; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
-; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; VLNODQ-NEXT: retq
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x double>
@@ -2002,30 +1998,22 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
+; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
+; NOVL-NEXT: vzeroupper
; NOVL-NEXT: retq
;
-; VLDQ-LABEL: ubto2f64:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
-; VLDQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
-; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto2f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
-; VLNODQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
-; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
-; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
-; VLNODQ-NEXT: vmovq %xmm0, %rax
-; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
-; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; VLNODQ-NEXT: retq
+; VL-LABEL: ubto2f64:
+; VL: # %bb.0:
+; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
+; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; VL-NEXT: vcvtudq2pd %xmm0, %xmm0
+; VL-NEXT: retq
%mask = icmp ult <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
ret <2 x double> %1
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 78111874b58..306b95f0f3a 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -2602,16 +2602,16 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sbto2f64:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x double>
@@ -2989,8 +2989,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
-; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto2f64:
@@ -2998,8 +2998,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
-; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ult <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
OpenPOWER on IntegriCloud