summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll342
1 files changed, 168 insertions, 174 deletions
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 29da69a979f..7d5f039b27f 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -6241,23 +6241,12 @@ entry:
define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2sd %eax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2sd %eax, %xmm0
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <2 x double>
@@ -6309,14 +6298,31 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
;
-; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i64:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: retq
+;
+; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64:
+; AVX512DQ: # %bb.0: # %entry
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
entry:
%result = call <2 x double>
@llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
@@ -6520,22 +6526,10 @@ entry:
define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2sd %eax, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm1, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2sd %eax, %xmm1
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; CHECK-NEXT: movd %xmm1, %eax
-; CHECK-NEXT: cvtsi2sd %eax, %xmm3
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2sd %eax, %xmm1
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32:
@@ -6605,21 +6599,28 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: constrained_vector_sitofp_v4f64_v4i64:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpextrq $1, %xmm1, %rax
-; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
-; AVX512-NEXT: vmovq %xmm1, %rax
-; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
+; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovq %xmm1, %rax
+; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64:
+; AVX512DQ: # %bb.0: # %entry
+; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512DQ-NEXT: retq
entry:
%result = call <4 x double>
@llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
@@ -6667,22 +6668,30 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
-; AVX512-LABEL: constrained_vector_sitofp_v4f32_v4i64:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
-; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64:
+; AVX512DQ: # %bb.0: # %entry
+; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
entry:
%result = call <4 x float>
@llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
@@ -6821,32 +6830,32 @@ entry:
define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2sd %rax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2sd %rax, %xmm0
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
+; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: addpd %xmm1, %xmm0
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vextractps $1, %xmm0, %eax
-; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX1-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX1-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vextractps $1, %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm0
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%result = call <2 x double>
@@ -6917,16 +6926,24 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i64:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX512-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0
-; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64:
+; AVX512DQ: # %bb.0: # %entry
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
entry:
%result = call <2 x double>
@llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
@@ -7300,22 +7317,22 @@ entry:
define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2sd %rax, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm1, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2sd %rax, %xmm1
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; CHECK-NEXT: movd %xmm1, %eax
-; CHECK-NEXT: cvtsi2sd %rax, %xmm3
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2sd %rax, %xmm1
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; CHECK-NEXT: movapd %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrld $16, %xmm1
+; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
+; CHECK-NEXT: movapd {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4]
+; CHECK-NEXT: mulpd %xmm2, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: addpd %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: psrld $16, %xmm1
+; CHECK-NEXT: cvtdq2pd %xmm1, %xmm4
+; CHECK-NEXT: mulpd %xmm2, %xmm4
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm3
+; CHECK-NEXT: cvtdq2pd %xmm3, %xmm1
+; CHECK-NEXT: addpd %xmm4, %xmm1
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32:
@@ -7331,14 +7348,9 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
;
; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
-; AVX512-NEXT: vmulpd %ymm2, %ymm1, %ymm1
-; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; AVX512-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
entry:
%result = call <4 x double>
@@ -7351,36 +7363,30 @@ entry:
define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrld $16, %xmm1
-; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK-NEXT: mulps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: por {{.*}}(%rip), %xmm1
+; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
-; AVX1-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
-; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
-; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0
-; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%result = call <4 x float>
@@ -7426,19 +7432,26 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i64:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
-; AVX512-NEXT: vpor %ymm2, %ymm1, %ymm1
-; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0
-; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
-; AVX512-NEXT: vpor %ymm2, %ymm0, %ymm0
-; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
-; AVX512-NEXT: vsubpd %ymm2, %ymm0, %ymm0
-; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
+; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
+; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64:
+; AVX512DQ: # %bb.0: # %entry
+; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512DQ-NEXT: retq
entry:
%result = call <4 x double>
@llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
@@ -7592,28 +7605,9 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
;
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpsrlq $1, %ymm0, %ymm3
-; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
-; AVX512DQ-NEXT: vmovq %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
-; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512DQ-NEXT: vmovq %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
-; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
-; AVX512DQ-NEXT: vaddps %xmm0, %xmm0, %xmm2
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
entry:
OpenPOWER on IntegriCloud